From c7a663879dd1542ba1406e6938e77334cd74ff73 Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:41:27 +0100
Subject: [PATCH 001/130] Trigger tests on push to devel or main branch

---
 .github/workflows/anaconda_linux.yml   | 2 +-
 .github/workflows/anaconda_windows.yml | 2 +-
 .github/workflows/intel.yml            | 2 +-
 .github/workflows/linux.yml            | 2 +-
 .github/workflows/macosx.yml           | 2 +-
 .github/workflows/pickle.yml           | 2 +-
 .github/workflows/pickle_wheel.yml     | 2 +-
 .github/workflows/windows.yml          | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/anaconda_linux.yml b/.github/workflows/anaconda_linux.yml
index 5a5384e5ce..525903a54f 100644
--- a/.github/workflows/anaconda_linux.yml
+++ b/.github/workflows/anaconda_linux.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/anaconda_windows.yml b/.github/workflows/anaconda_windows.yml
index 154a4d01e8..0f3f8a04ed 100644
--- a/.github/workflows/anaconda_windows.yml
+++ b/.github/workflows/anaconda_windows.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: windows-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
index 977d5f9afd..5f340e1088 100644
--- a/.github/workflows/intel.yml
+++ b/.github/workflows/intel.yml
@@ -29,7 +29,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index ad39cee725..664ae3aa60 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   matrix_prep:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
     steps:
diff --git a/.github/workflows/macosx.yml b/.github/workflows/macosx.yml
index 4768a64efa..f51041c0b8 100644
--- a/.github/workflows/macosx.yml
+++ b/.github/workflows/macosx.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: macos-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/pickle.yml b/.github/workflows/pickle.yml
index 052028a5cb..cc3864afd2 100644
--- a/.github/workflows/pickle.yml
+++ b/.github/workflows/pickle.yml
@@ -31,7 +31,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-matrix.outputs.python_version }}
       matrix: ${{ steps.set-matrix.outputs.matrix }}
diff --git a/.github/workflows/pickle_wheel.yml b/.github/workflows/pickle_wheel.yml
index 1dc82af503..718dc13dcc 100644
--- a/.github/workflows/pickle_wheel.yml
+++ b/.github/workflows/pickle_wheel.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 60c560ffee..827038a279 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: windows-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:

From 821a1c5ea3fd29387848c137f53b9ca34194b59c Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:46:33 +0100
Subject: [PATCH 002/130] Add cuda workflow to test cuda developments on CI

---
 .github/actions/coverage_install/action.yml |  2 +-
 .github/actions/linux_install/action.yml    | 10 +--
 .github/actions/pytest_run/action.yml       |  4 +-
 .github/actions/pytest_run_cuda/action.yml  | 17 +++++
 .github/actions/python_install/action.yml   | 17 +++++
 .github/workflows/cuda.yml                  | 83 +++++++++++++++++++++
 ci_tools/bot_messages/show_tests.txt        |  1 +
 ci_tools/bot_tools/bot_funcs.py             | 12 +--
 ci_tools/devel_branch_tests.py              |  1 +
 ci_tools/json_pytest_output.py              |  2 +-
 10 files changed, 135 insertions(+), 14 deletions(-)
 create mode 100644 .github/actions/pytest_run_cuda/action.yml
 create mode 100644 .github/actions/python_install/action.yml
 create mode 100644 .github/workflows/cuda.yml

diff --git a/.github/actions/coverage_install/action.yml b/.github/actions/coverage_install/action.yml
index ac5294e542..5732baee34 100644
--- a/.github/actions/coverage_install/action.yml
+++ b/.github/actions/coverage_install/action.yml
@@ -15,7 +15,7 @@ runs:
     - name: Directory Creation
       run: |
         INSTALL_DIR=$(cd tests; python -c "import pyccel; print(pyccel.__path__[0])")
-        SITE_DIR=$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
+        SITE_DIR=$(dirname ${INSTALL_DIR})
         echo -e "import coverage; coverage.process_startup()" > ${SITE_DIR}/pyccel_cov.pth
         echo -e "[run]\nparallel = True\nsource = ${INSTALL_DIR}\ndata_file = $(pwd)/.coverage\n[report]\ninclude = ${INSTALL_DIR}/*\n[xml]\noutput = cobertura.xml" > .coveragerc
         echo "SITE_DIR=${SITE_DIR}" >> $GITHUB_ENV
diff --git a/.github/actions/linux_install/action.yml b/.github/actions/linux_install/action.yml
index 8fb5cd8505..0ef9a69b8e 100644
--- a/.github/actions/linux_install/action.yml
+++ b/.github/actions/linux_install/action.yml
@@ -9,22 +9,22 @@ runs:
       shell: bash
     - name: Install fortran
       run:
-        sudo apt-get install gfortran
+        sudo apt-get install -y gfortran
       shell: bash
     - name: Install LaPack
       run:
-        sudo apt-get install libblas-dev liblapack-dev
+        sudo apt-get install -y libblas-dev liblapack-dev
       shell: bash
     - name: Install MPI
       run: |
-        sudo apt-get install libopenmpi-dev openmpi-bin
+        sudo apt-get install -y libopenmpi-dev openmpi-bin
         echo "MPI_OPTS=--oversubscribe" >> $GITHUB_ENV
       shell: bash
     - name: Install OpenMP
       run:
-        sudo apt-get install libomp-dev libomp5
+        sudo apt-get install -y libomp-dev libomp5
       shell: bash
     - name: Install Valgrind
       run:
-        sudo apt-get install valgrind
+        sudo apt-get install -y valgrind
       shell: bash
diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml
index 0b6f0f988d..b0bdc31f16 100644
--- a/.github/actions/pytest_run/action.yml
+++ b/.github/actions/pytest_run/action.yml
@@ -51,13 +51,13 @@ runs:
       working-directory: ./tests
       id: pytest_3
     - name: Test Fortran translations
-      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
+      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
       id: pytest_4
     - name: Test multi-file Fortran translations
       run: |
-        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
+        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml
new file mode 100644
index 0000000000..52092a6e02
--- /dev/null
+++ b/.github/actions/pytest_run_cuda/action.yml
@@ -0,0 +1,17 @@
+name: 'Pyccel pytest commands generating Ccuda'
+inputs:
+  shell_cmd:
+    description: 'Specifies the shell command (different for anaconda)'
+    required: false
+    default: "bash"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Ccuda tests with pytest
+      run: |
+        # Catch exit 5 (no tests found)
+        sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
+        pyccel-clean
+      shell: ${{ inputs.shell_cmd }}
+      working-directory: ./tests
diff --git a/.github/actions/python_install/action.yml b/.github/actions/python_install/action.yml
new file mode 100644
index 0000000000..f9b720e3e1
--- /dev/null
+++ b/.github/actions/python_install/action.yml
@@ -0,0 +1,17 @@
+name: 'Python installation commands'
+
+runs:
+  using: "composite"
+  steps:
+    - name: Install python
+      run:
+        sudo apt-get -y install python3-dev
+      shell: bash
+    - name: python as python3
+      run:
+        sudo apt-get -y install python-is-python3
+      shell: bash
+    - name: Install Pip
+      run:
+        sudo apt-get -y install python3-pip
+      shell: bash
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
new file mode 100644
index 0000000000..833ebf5d85
--- /dev/null
+++ b/.github/workflows/cuda.yml
@@ -0,0 +1,83 @@
+name: Cuda unit tests
+
+on:
+  workflow_dispatch:
+    inputs:
+      python_version:
+        required: false
+        type: string
+      ref:
+        required: false
+        type: string
+      check_run_id:
+        required: false
+        type: string
+      pr_repo:
+        required: false
+        type: string
+  push:
+    branches: [devel, main]
+
+env:
+  COMMIT: ${{ inputs.ref || github.event.ref }}
+  PEM: ${{ secrets.BOT_PEM }}
+  GITHUB_RUN_ID: ${{ github.run_id }}
+  GITHUB_CHECK_RUN_ID: ${{ inputs.check_run_id }}
+  PR_REPO: ${{ inputs.pr_repo || github.repository }}
+
+jobs:
+  Cuda:
+
+    runs-on: ubuntu-20.04
+    name: Unit tests
+
+    container: nvidia/cuda:11.7.1-devel-ubuntu20.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          ref: ${{ env.COMMIT }}
+          repository: ${{ env.PR_REPO }}
+      - name: Prepare docker
+        run: |
+          apt update && apt install sudo
+          TZ=Europe/France
+          ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+          DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata
+        shell: bash
+      - name: Install python (setup-python action doesn't work with containers)
+        uses: ./.github/actions/python_install
+      - name: "Setup"
+        id: token
+        run: |
+          pip install jwt requests
+          python ci_tools/setup_check_run.py cuda
+      - name: CUDA Version
+        run: nvcc --version # cuda install check
+      - name: Install dependencies
+        uses: ./.github/actions/linux_install
+      - name: Install Pyccel with tests
+        run: |
+            PATH=${PATH}:$HOME/.local/bin
+            echo "PATH=${PATH}" >> $GITHUB_ENV
+            python -m pip install --upgrade pip
+            python -m pip install --user .[test]
+        shell: bash
+      - name: Coverage install
+        uses: ./.github/actions/coverage_install
+      - name: Ccuda tests with pytest
+        id: cuda_pytest
+        uses: ./.github/actions/pytest_run_cuda
+      - name: Collect coverage information
+        continue-on-error: True
+        uses: ./.github/actions/coverage_collection
+      - name: Save code coverage report
+        uses: actions/upload-artifact@v3
+        with:
+          name: coverage-artifact
+          path: .coverage
+          retention-days: 1
+      - name: "Post completed"
+        if: always()
+        run:
+          python ci_tools/complete_check_run.py ${{ steps.cuda_pytest.outcome }}
+
diff --git a/ci_tools/bot_messages/show_tests.txt b/ci_tools/bot_messages/show_tests.txt
index adc07e8431..eb15492d2e 100644
--- a/ci_tools/bot_messages/show_tests.txt
+++ b/ci_tools/bot_messages/show_tests.txt
@@ -2,6 +2,7 @@ The following is a list of keywords which can be used to run tests. Tests in bol
 - **linux** : Runs the unit tests on a Linux system.
 - **windows** : Runs the unit tests on a Windows system.
 - **macosx** : Runs the unit tests on a MacOS X system.
+- **cuda** : Runs the cuda unit tests on a Linux system.
 - **coverage** : Runs the unit tests on a Linux system and checks the coverage of the tests.
 - **docs** : Checks if the documentation follows the numpydoc format.
 - **pylint** : Runs pylint on files which are too big to be handled by codacy.
diff --git a/ci_tools/bot_tools/bot_funcs.py b/ci_tools/bot_tools/bot_funcs.py
index 7084a01bb9..1621d1d089 100644
--- a/ci_tools/bot_tools/bot_funcs.py
+++ b/ci_tools/bot_tools/bot_funcs.py
@@ -23,7 +23,8 @@
         'pyccel_lint': '3.8',
         'pylint': '3.8',
         'spelling': '3.8',
-        'windows': '3.8'
+        'windows': '3.8',
+        'cuda': '-'
         }
 
 test_names = {
@@ -40,15 +41,16 @@
         'pyccel_lint': "Pyccel best practices",
         'pylint': "Python linting",
         'spelling': "Spelling verification",
-        'windows': "Unit tests on Windows"
+        'windows': "Unit tests on Windows",
+        'cuda': "Unit tests on Linux with cuda"
         }
 
-test_dependencies = {'coverage':['linux']}
+test_dependencies = {'coverage':['linux', 'cuda']}
 
 tests_with_base = ('coverage', 'docs', 'pyccel_lint', 'pylint')
 
 pr_test_keys = ('linux', 'windows', 'macosx', 'coverage', 'docs', 'pylint',
-                'pyccel_lint', 'spelling')
+                'pyccel_lint', 'spelling', 'cuda')
 
 review_stage_labels = ["needs_initial_review", "Ready_for_review", "Ready_to_merge"]
 
@@ -420,7 +422,7 @@ def is_test_required(self, commit_log, name, key, state):
             True if the test should be run, False otherwise.
         """
         print("Checking : ", name, key)
-        if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel'):
+        if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel', 'cuda'):
             has_relevant_change = lambda diff: any((f.startswith('pyccel/') or f.startswith('tests/')) #pylint: disable=unnecessary-lambda-assignment
                                                     and f.endswith('.py') and f != 'pyccel/version.py'
                                                     for f in diff)
diff --git a/ci_tools/devel_branch_tests.py b/ci_tools/devel_branch_tests.py
index 1102ef9e92..ec67b6c49a 100644
--- a/ci_tools/devel_branch_tests.py
+++ b/ci_tools/devel_branch_tests.py
@@ -15,3 +15,4 @@
     bot.run_tests(['anaconda_linux'], '3.10', force_run = True)
     bot.run_tests(['anaconda_windows'], '3.10', force_run = True)
     bot.run_tests(['intel'], '3.9', force_run = True)
+    bot.run_tests(['cuda'], '-', force_run = True)
diff --git a/ci_tools/json_pytest_output.py b/ci_tools/json_pytest_output.py
index 409ae76d72..b84f4a4c09 100644
--- a/ci_tools/json_pytest_output.py
+++ b/ci_tools/json_pytest_output.py
@@ -61,7 +61,7 @@ def     mini_md_summary(title, outcome, failed_tests):
     summary = ""
 
     failed_pattern = re.compile(r".*FAILED.*")
-    languages = ('c', 'fortran', 'python')
+    languages = ('c', 'fortran', 'python', 'cuda')
     pattern = {lang: re.compile(r".*\["+lang+r"\]\ \_.*") for lang in languages}
 
     for i in p_args.tests:

From 092b557cf0ead7c949731adf40f0acd6678dbe66 Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:41:27 +0100
Subject: [PATCH 003/130] Trigger tests on push to devel or main branch

---
 .github/workflows/deploy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 9111b47d52..cf52b1c624 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -10,7 +10,7 @@ jobs:
   waitForWorklows:
     name: Wait for workflows
     runs-on: ubuntu-latest
-    if: github.event.workflow_run.head_branch == 'main'
+    if: github.event.workflow_run.head_branch == 'main' && github.repository == 'pyccel/pyccel'
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4

From 02a2360e41a3f3d09b31e271609dbe642c13ac01 Mon Sep 17 00:00:00 2001
From: bauom <40796259+bauom@users.noreply.github.com>
Date: Wed, 28 Feb 2024 18:11:50 +0100
Subject: [PATCH 004/130] [init] Adding CUDA language/compiler and CodePrinter
 (#32)

This PR aims to make the C code compilable using nvcc. The cuda language was added as well as a CudaCodePrinter.

Changes to stdlib:

Wrapped expressions using complex types in an `ifndef __NVCC__` to avoid processing them with the nvcc compiler

---------

Co-authored-by: Mouad Elalj, EmilyBourne
---
 .dict_custom.txt                           |   1 +
 .github/actions/pytest_parallel/action.yml |   4 +-
 .github/actions/pytest_run/action.yml      |   4 +-
 .github/actions/pytest_run_cuda/action.yml |  11 +-
 CHANGELOG.md                               |   6 +
 pyccel/codegen/codegen.py                  |   8 +-
 pyccel/codegen/compiling/compilers.py      |   5 +-
 pyccel/codegen/pipeline.py                 |   5 +-
 pyccel/codegen/printing/cucode.py          |  74 +++++++++++
 pyccel/commands/console.py                 |   2 +-
 pyccel/compilers/default_compilers.py      |  13 +-
 pyccel/naming/__init__.py                  |   4 +-
 pyccel/naming/cudanameclashchecker.py      |  92 ++++++++++++++
 pyccel/stdlib/numpy/numpy_c.c              |   2 +
 pyccel/stdlib/numpy/numpy_c.h              |   2 +
 pytest.ini                                 |   1 +
 tests/conftest.py                          |  11 ++
 tests/epyccel/test_base.py                 | 136 ++++++++++-----------
 18 files changed, 298 insertions(+), 83 deletions(-)
 create mode 100644 pyccel/codegen/printing/cucode.py
 create mode 100644 pyccel/naming/cudanameclashchecker.py

diff --git a/.dict_custom.txt b/.dict_custom.txt
index 82a6b10d31..ae99f31ed4 100644
--- a/.dict_custom.txt
+++ b/.dict_custom.txt
@@ -110,6 +110,7 @@ Valgrind
 variadic
 subclasses
 oneAPI
+Cuda
 getter
 setter
 bitwise
diff --git a/.github/actions/pytest_parallel/action.yml b/.github/actions/pytest_parallel/action.yml
index c7c77d99c7..f91d84915b 100644
--- a/.github/actions/pytest_parallel/action.yml
+++ b/.github/actions/pytest_parallel/action.yml
@@ -10,8 +10,8 @@ runs:
   steps:
     - name: Test with pytest
       run: |
-        mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m parallel -rXx
-        #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m parallel -rXx
+        mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m "parallel and not cuda" -rXx
+        #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m "parallel and not cuda" -rXx
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
 
diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml
index b0bdc31f16..451fa39e92 100644
--- a/.github/actions/pytest_run/action.yml
+++ b/.github/actions/pytest_run/action.yml
@@ -51,13 +51,13 @@ runs:
       working-directory: ./tests
       id: pytest_3
     - name: Test Fortran translations
-      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
+      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
       id: pytest_4
     - name: Test multi-file Fortran translations
       run: |
-        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
+        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml
index 52092a6e02..46f90552ed 100644
--- a/.github/actions/pytest_run_cuda/action.yml
+++ b/.github/actions/pytest_run_cuda/action.yml
@@ -1,4 +1,4 @@
-name: 'Pyccel pytest commands generating Ccuda'
+name: 'Pyccel pytest commands generating Cuda'
 inputs:
   shell_cmd:
     description: 'Specifies the shell command (different for anaconda)'
@@ -11,7 +11,14 @@ runs:
     - name: Ccuda tests with pytest
       run: |
         # Catch exit 5 (no tests found)
-        sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
+        python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
+    - name: Final step
+      if: always()
+      id: status
+      run:
+        python ci_tools/json_pytest_output.py -t "Cuda Test Summary" --tests "Cuda tests:${{ steps.pytest_1.outcome }}:tests/s1_outfile.out"
+                      
+      shell: ${{ inputs.shell_cmd }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4807a17474..ce9212abc6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,12 @@
 # Change Log
 All notable changes to this project will be documented in this file.
 
+## \[Cuda - UNRELEASED\]
+
+### Added
+
+-   #32 : add support for `nvcc` Compiler and `cuda` language as a possible option.
+
 ## \[UNRELEASED\]
 
 ### Added
diff --git a/pyccel/codegen/codegen.py b/pyccel/codegen/codegen.py
index daf4559df4..8d4abb6bdb 100644
--- a/pyccel/codegen/codegen.py
+++ b/pyccel/codegen/codegen.py
@@ -9,16 +9,18 @@
 from pyccel.codegen.printing.fcode  import FCodePrinter
 from pyccel.codegen.printing.ccode  import CCodePrinter
 from pyccel.codegen.printing.pycode import PythonCodePrinter
+from pyccel.codegen.printing.cucode import CudaCodePrinter
 
 from pyccel.ast.core      import FunctionDef, Interface, ModuleHeader
 from pyccel.utilities.stage import PyccelStage
 
-_extension_registry = {'fortran': 'f90', 'c':'c',  'python':'py'}
-_header_extension_registry = {'fortran': None, 'c':'h',  'python':None}
+_extension_registry = {'fortran': 'f90', 'c':'c',  'python':'py', 'cuda':'cu'}
+_header_extension_registry = {'fortran': None, 'c':'h',  'python':None, 'cuda':'h'}
 printer_registry    = {
                         'fortran':FCodePrinter,
                         'c':CCodePrinter,
-                        'python':PythonCodePrinter
+                        'python':PythonCodePrinter,
+                        'cuda':CudaCodePrinter
                       }
 
 pyccel_stage = PyccelStage()
diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py
index fca93c5624..ef11579e49 100644
--- a/pyccel/codegen/compiling/compilers.py
+++ b/pyccel/codegen/compiling/compilers.py
@@ -441,7 +441,10 @@ def compile_shared_library(self, compile_obj, output_folder, verbose = False, sh
         # Collect compile information
         exec_cmd, includes, libs_flags, libdirs_flags, m_code = \
                 self._get_compile_components(compile_obj, accelerators)
-        linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags]
+        if self._info['exec'] == 'nvcc':
+            linker_libdirs_flags = ['-Xcompiler' if l == '-L' else f'"-Wl,-rpath,{l}"' for l in libdirs_flags]
+        else:
+            linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags]
 
         flags.insert(0,"-shared")
 
diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py
index c0f8634e03..1e9d0e327d 100644
--- a/pyccel/codegen/pipeline.py
+++ b/pyccel/codegen/pipeline.py
@@ -180,9 +180,10 @@ def handle_error(stage):
     if language is None:
         language = 'fortran'
 
-    # Choose Fortran compiler
+    # Choose Default compiler
     if compiler is None:
-        compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', 'GNU')
+        default_compiler_family = 'nvidia' if language == 'cuda' else 'GNU'
+        compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', default_compiler_family)
 
     fflags = [] if fflags is None else fflags.split()
     wrapper_flags = [] if wrapper_flags is None else wrapper_flags.split()
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
new file mode 100644
index 0000000000..86146b065b
--- /dev/null
+++ b/pyccel/codegen/printing/cucode.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Provide tools for generating and handling CUDA code.
+This module is designed to interface Pyccel's Abstract Syntax Tree (AST) with CUDA,
+enabling the direct translation of high-level Pyccel expressions into CUDA code.
+"""
+
+from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers
+
+from pyccel.ast.core        import Import, Module
+
+from pyccel.errors.errors   import Errors
+
+
+errors = Errors()
+
+__all__ = ["CudaCodePrinter"]
+
+class CudaCodePrinter(CCodePrinter):
+    """
+    Print code in CUDA format.
+
+    This printer converts Pyccel's Abstract Syntax Tree (AST) into strings of CUDA code.
+    Navigation through this file utilizes _print_X functions,
+    as is common with all printers.
+
+    Parameters
+    ----------
+    filename : str
+            The name of the file being pyccelised.
+    prefix_module : str
+            A prefix to be added to the name of the module.
+    """
+    language = "cuda"
+
+    def __init__(self, filename, prefix_module = None):
+
+        errors.set_target(filename)
+
+        super().__init__(filename)
+
+    def _print_Module(self, expr):
+        self.set_scope(expr.scope)
+        self._current_module = expr.name
+        body = ''.join(self._print(i) for i in expr.body)
+
+        global_variables = ''.join(self._print(d) for d in expr.declarations)
+
+        # Print imports last to be sure that all additional_imports have been collected
+        imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()]
+        c_headers_imports = ''
+        local_imports = ''
+
+        for imp in imports:
+            if imp.source in c_library_headers:
+                c_headers_imports += self._print(imp)
+            else:
+                local_imports += self._print(imp)
+
+        imports = f'{c_headers_imports}\
+                    extern "C"{{\n\
+                    {local_imports}\
+                    }}'
+
+        code = f'{imports}\n\
+                 {global_variables}\n\
+                 {body}\n'
+
+        self.exit_scope()
+        return code
diff --git a/pyccel/commands/console.py b/pyccel/commands/console.py
index 596c440ec0..fcbec009de 100644
--- a/pyccel/commands/console.py
+++ b/pyccel/commands/console.py
@@ -80,7 +80,7 @@ def pyccel(files=None, mpi=None, openmp=None, openacc=None, output_dir=None, com
     # ... backend compiler options
     group = parser.add_argument_group('Backend compiler options')
 
-    group.add_argument('--language', choices=('fortran', 'c', 'python'), help='Generated language')
+    group.add_argument('--language', choices=('fortran', 'c', 'python', 'cuda'), help='Generated language')
 
     group.add_argument('--compiler', help='Compiler family or json file containing a compiler description {GNU,intel,PGI}')
 
diff --git a/pyccel/compilers/default_compilers.py b/pyccel/compilers/default_compilers.py
index 166085d22e..d47856773c 100644
--- a/pyccel/compilers/default_compilers.py
+++ b/pyccel/compilers/default_compilers.py
@@ -185,6 +185,15 @@
                 },
             'family': 'nvidia',
             }
+#------------------------------------------------------------
+nvcc_info = {'exec'         : 'nvcc',
+             'language'     : 'cuda',
+             'debug_flags'  : ("-g",),
+             'release_flags': ("-O3",),
+             'general_flags': ('--compiler-options', '-fPIC',),
+             'family'       : 'nvidia'
+            }
+
 
 #------------------------------------------------------------
 def change_to_lib_flag(lib):
@@ -288,6 +297,7 @@ def change_to_lib_flag(lib):
 pgfortran_info.update(python_info)
 nvc_info.update(python_info)
 nvfort_info.update(python_info)
+nvcc_info.update(python_info)
 
 available_compilers = {('GNU', 'c') : gcc_info,
                        ('GNU', 'fortran') : gfort_info,
@@ -296,6 +306,7 @@ def change_to_lib_flag(lib):
                        ('PGI', 'c') : pgcc_info,
                        ('PGI', 'fortran') : pgfortran_info,
                        ('nvidia', 'c') : nvc_info,
-                       ('nvidia', 'fortran') : nvfort_info}
+                       ('nvidia', 'fortran') : nvfort_info,
+                       ('nvidia', 'cuda'): nvcc_info}
 
 vendors = ('GNU','intel','PGI','nvidia')
diff --git a/pyccel/naming/__init__.py b/pyccel/naming/__init__.py
index 72c318d3ad..b3e4bbbe0e 100644
--- a/pyccel/naming/__init__.py
+++ b/pyccel/naming/__init__.py
@@ -10,7 +10,9 @@
 from .fortrannameclashchecker import FortranNameClashChecker
 from .cnameclashchecker import CNameClashChecker
 from .pythonnameclashchecker import PythonNameClashChecker
+from .cudanameclashchecker import CudaNameClashChecker
 
 name_clash_checkers = {'fortran':FortranNameClashChecker(),
         'c':CNameClashChecker(),
-        'python':PythonNameClashChecker()}
+        'python':PythonNameClashChecker(),
+        'cuda':CudaNameClashChecker()}
diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py
new file mode 100644
index 0000000000..971204e912
--- /dev/null
+++ b/pyccel/naming/cudanameclashchecker.py
@@ -0,0 +1,92 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Handles name clash problems in Cuda
+"""
+from .languagenameclashchecker import LanguageNameClashChecker
+
+class CudaNameClashChecker(LanguageNameClashChecker):
+    """
+    Class containing functions to help avoid problematic names in Cuda.
+
+    A class which provides functionalities to check or propose variable names and
+    verify that they do not cause name clashes. Name clashes may be due to
+    new variables, or due to the use of reserved keywords.
+    """
+    # Keywords as mentioned on https://en.cppreference.com/w/c/keyword
+    keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const',
+        'continue', 'default', 'do', 'double', 'else', 'enum',
+        'extern', 'float', 'for', 'goto', 'if', 'inline', 'int',
+        'long', 'register', 'restrict', 'return', 'short', 'signed',
+        'sizeof', 'static', 'struct', 'switch', 'typedef', 'union',
+        'unsigned', 'void', 'volatile', 'whie', '_Alignas',
+        '_Alignof', '_Atomic', '_Bool', '_Complex', 'Decimal128',
+        '_Decimal32', '_Decimal64', '_Generic', '_Imaginary',
+        '_Noreturn', '_Static_assert', '_Thread_local', 't_ndarray',
+        'array_create', 'new_slice', 'array_slicing', 'alias_assign',
+        'transpose_alias_assign', 'array_fill', 't_slice',
+        'GET_INDEX_EXP1', 'GET_INDEX_EXP2', 'GET_INDEX_EXP2',
+        'GET_INDEX_EXP3', 'GET_INDEX_EXP4', 'GET_INDEX_EXP5',
+        'GET_INDEX_EXP6', 'GET_INDEX_EXP7', 'GET_INDEX_EXP8',
+        'GET_INDEX_EXP9', 'GET_INDEX_EXP10', 'GET_INDEX_EXP11',
+        'GET_INDEX_EXP12', 'GET_INDEX_EXP13', 'GET_INDEX_EXP14',
+        'GET_INDEX_EXP15', 'NUM_ARGS_H1', 'NUM_ARGS',
+        'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX',
+        'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer',
+        'get_index', 'numpy_to_ndarray_strides',
+        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'])
+
+    def has_clash(self, name, symbols):
+        """
+        Indicate whether the proposed name causes any clashes.
+
+        Checks if a suggested name conflicts with predefined
+        keywords or specified symbols,returning true for a clash.
+        This method is crucial for maintaining namespace integrity and
+        preventing naming conflicts in code generation processes.
+
+        Parameters
+        ----------
+        name : str
+            The suggested name.
+        symbols : set
+            Symbols which should be considered as collisions.
+
+        Returns
+        -------
+        bool
+            True if the name is a collision.
+            False if the name is collision free.
+        """
+        return any(name == k for k in self.keywords) or \
+               any(name == s for s in symbols)
+
+    def get_collisionless_name(self, name, symbols):
+        """
+        Get a valid name which doesn't collision with symbols or Cuda keywords.
+
+        Find a new name based on the suggested name which will not cause
+        conflicts with Cuda keywords, does not appear in the provided symbols,
+        and is a valid name in Cuda code.
+
+        Parameters
+        ----------
+        name : str
+            The suggested name.
+        symbols : set
+            Symbols which should be considered as collisions.
+
+        Returns
+        -------
+        str
+            A new name which is collision free.
+        """
+        if len(name)>4 and all(name[i] == '_' for i in (0,1,-1,-2)):
+            # Ignore magic methods
+            return name
+        if name[0] == '_':
+            name = 'private'+name
+        return self._get_collisionless_name(name, symbols)
diff --git a/pyccel/stdlib/numpy/numpy_c.c b/pyccel/stdlib/numpy/numpy_c.c
index 7c9ecbbf6b..bc56214772 100644
--- a/pyccel/stdlib/numpy/numpy_c.c
+++ b/pyccel/stdlib/numpy/numpy_c.c
@@ -17,8 +17,10 @@ double  fsign(double x)
     return SIGN(x);
 }
 
+#ifndef __NVCC__
 /* numpy.sign for complex */
 double complex csign(double complex x)
 {
     return x ? ((!creal(x) && cimag(x) < 0) || (creal(x) < 0) ? -1 : 1) : 0;
 }
+#endif
diff --git a/pyccel/stdlib/numpy/numpy_c.h b/pyccel/stdlib/numpy/numpy_c.h
index e72cf3ad57..c2a16a5516 100644
--- a/pyccel/stdlib/numpy/numpy_c.h
+++ b/pyccel/stdlib/numpy/numpy_c.h
@@ -15,6 +15,8 @@
 
 long long int isign(long long int x);
 double fsign(double x);
+#ifndef __NVCC__
 double complex csign(double complex x);
+#endif
 
 #endif
diff --git a/pytest.ini b/pytest.ini
index 42eb0d72ba..3792ab65f9 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -9,3 +9,4 @@ markers =
     python: test to generate python code
     xdist_incompatible: test which compiles a file also compiled by another test
     external: test using an external dll (problematic with conda on Windows)
+    cuda: test to generate cuda code
diff --git a/tests/conftest.py b/tests/conftest.py
index 79144b6978..a5082ef6e8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -21,6 +21,17 @@
 def language(request):
     return request.param
 
+@pytest.fixture( params=[
+        pytest.param("fortran", marks = pytest.mark.fortran),
+        pytest.param("c", marks = pytest.mark.c),
+        pytest.param("python", marks = pytest.mark.python),
+        pytest.param("cuda", marks = pytest.mark.cuda)
+    ],
+    scope = "session"
+)
+def language_with_cuda(request):
+    return request.param
+
 def move_coverage(path_dir):
     for root, _, files in os.walk(path_dir):
         for name in files:
diff --git a/tests/epyccel/test_base.py b/tests/epyccel/test_base.py
index c22064d321..413f79eef1 100644
--- a/tests/epyccel/test_base.py
+++ b/tests/epyccel/test_base.py
@@ -7,128 +7,128 @@
 from utilities import epyccel_test
 
 
-def test_is_false(language):
-    test = epyccel_test(base.is_false, lang=language)
+def test_is_false(language_with_cuda):
+    test = epyccel_test(base.is_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_is_true(language):
-    test = epyccel_test(base.is_true, lang=language)
+def test_is_true(language_with_cuda):
+    test = epyccel_test(base.is_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_compare_is(language):
-    test = epyccel_test(base.compare_is, lang=language)
+def test_compare_is(language_with_cuda):
+    test = epyccel_test(base.compare_is, lang=language_with_cuda)
     test.compare_epyccel( True, True )
     test.compare_epyccel( True, False )
     test.compare_epyccel( False, True )
     test.compare_epyccel( False, False )
 
-def test_compare_is_not(language):
-    test = epyccel_test(base.compare_is_not, lang=language)
+def test_compare_is_not(language_with_cuda):
+    test = epyccel_test(base.compare_is_not, lang=language_with_cuda)
     test.compare_epyccel( True, True )
     test.compare_epyccel( True, False )
     test.compare_epyccel( False, True )
     test.compare_epyccel( False, False )
 
-def test_compare_is_int(language):
-    test = epyccel_test(base.compare_is_int, lang=language)
+def test_compare_is_int(language_with_cuda):
+    test = epyccel_test(base.compare_is_int, lang=language_with_cuda)
     test.compare_epyccel( True, 1 )
     test.compare_epyccel( True, 0 )
     test.compare_epyccel( False, 1 )
     test.compare_epyccel( False, 0 )
 
-def test_compare_is_not_int(language):
-    test = epyccel_test(base.compare_is_not_int, lang=language)
+def test_compare_is_not_int(language_with_cuda):
+    test = epyccel_test(base.compare_is_not_int, lang=language_with_cuda)
     test.compare_epyccel( True, 1 )
     test.compare_epyccel( True, 0 )
     test.compare_epyccel( False, 1 )
     test.compare_epyccel( False, 0 )
 
-def test_not_false(language):
-    test = epyccel_test(base.not_false, lang=language)
+def test_not_false(language_with_cuda):
+    test = epyccel_test(base.not_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not_true(language):
-    test = epyccel_test(base.not_true, lang=language)
+def test_not_true(language_with_cuda):
+    test = epyccel_test(base.not_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_eq_false(language):
-    test = epyccel_test(base.eq_false, lang=language)
+def test_eq_false(language_with_cuda):
+    test = epyccel_test(base.eq_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_eq_true(language):
-    test = epyccel_test(base.eq_true, lang=language)
+def test_eq_true(language_with_cuda):
+    test = epyccel_test(base.eq_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_neq_false(language):
-    test = epyccel_test(base.eq_false, lang=language)
+def test_neq_false(language_with_cuda):
+    test = epyccel_test(base.eq_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_neq_true(language):
-    test = epyccel_test(base.eq_true, lang=language)
+def test_neq_true(language_with_cuda):
+    test = epyccel_test(base.eq_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not(language):
-    test = epyccel_test(base.not_val, lang=language)
+def test_not(language_with_cuda):
+    test = epyccel_test(base.not_val, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not_int(language):
-    test = epyccel_test(base.not_int, lang=language)
+def test_not_int(language_with_cuda):
+    test = epyccel_test(base.not_int, lang=language_with_cuda)
     test.compare_epyccel( 0 )
     test.compare_epyccel( 4 )
 
-def test_compare_is_nil(language):
-    test = epyccel_test(base.is_nil, lang=language)
+def test_compare_is_nil(language_with_cuda):
+    test = epyccel_test(base.is_nil, lang=language_with_cuda)
     test.compare_epyccel( None )
 
-def test_compare_is_not_nil(language):
-    test = epyccel_test(base.is_not_nil, lang=language)
+def test_compare_is_not_nil(language_with_cuda):
+    test = epyccel_test(base.is_not_nil, lang=language_with_cuda)
     test.compare_epyccel( None )
 
-def test_cast_int(language):
-    test = epyccel_test(base.cast_int, lang=language)
+def test_cast_int(language_with_cuda):
+    test = epyccel_test(base.cast_int, lang=language_with_cuda)
     test.compare_epyccel( 4 )
-    test = epyccel_test(base.cast_float_to_int, lang=language)
+    test = epyccel_test(base.cast_float_to_int, lang=language_with_cuda)
     test.compare_epyccel( 4.5 )
 
-def test_cast_bool(language):
-    test = epyccel_test(base.cast_bool, lang=language)
+def test_cast_bool(language_with_cuda):
+    test = epyccel_test(base.cast_bool, lang=language_with_cuda)
     test.compare_epyccel( True )
 
-def test_cast_float(language):
-    test = epyccel_test(base.cast_float, lang=language)
+def test_cast_float(language_with_cuda):
+    test = epyccel_test(base.cast_float, lang=language_with_cuda)
     test.compare_epyccel( 4.5 )
-    test = epyccel_test(base.cast_int_to_float, lang=language)
+    test = epyccel_test(base.cast_int_to_float, lang=language_with_cuda)
     test.compare_epyccel( 4 )
 
-def test_if_0_int(language):
-    test = epyccel_test(base.if_0_int, lang=language)
+def test_if_0_int(language_with_cuda):
+    test = epyccel_test(base.if_0_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
     test.compare_epyccel( 0 )
 
-def test_if_0_real(language):
-    test = epyccel_test(base.if_0_real, lang=language)
+def test_if_0_real(language_with_cuda):
+    test = epyccel_test(base.if_0_real, lang=language_with_cuda)
     test.compare_epyccel( 22.3 )
     test.compare_epyccel( 0.0 )
 
-def test_same_int(language):
-    test = epyccel_test(base.is_same_int, lang=language)
+def test_same_int(language_with_cuda):
+    test = epyccel_test(base.is_same_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
-    test = epyccel_test(base.isnot_same_int, lang=language)
+    test = epyccel_test(base.isnot_same_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
 
-def test_same_float(language):
-    test = epyccel_test(base.is_same_float, lang=language)
+def test_same_float(language_with_cuda):
+    test = epyccel_test(base.is_same_float, lang=language_with_cuda)
     test.compare_epyccel( 22.2 )
-    test = epyccel_test(base.isnot_same_float, lang=language)
+    test = epyccel_test(base.isnot_same_float, lang=language_with_cuda)
     test.compare_epyccel( 22.2 )
 
 @pytest.mark.parametrize( 'language', [
@@ -150,28 +150,28 @@ def test_same_complex(language):
     test = epyccel_test(base.isnot_same_complex, lang=language)
     test.compare_epyccel( complex(2,3) )
 
-def test_is_types(language):
-    test = epyccel_test(base.is_types, lang=language)
+def test_is_types(language_with_cuda):
+    test = epyccel_test(base.is_types, lang=language_with_cuda)
     test.compare_epyccel( 1, 1.0 )
 
-def test_isnot_types(language):
-    test = epyccel_test(base.isnot_types, lang=language)
+def test_isnot_types(language_with_cuda):
+    test = epyccel_test(base.isnot_types, lang=language_with_cuda)
     test.compare_epyccel( 1, 1.0 )
 
-def test_none_is_none(language):
-    test = epyccel_test(base.none_is_none, lang=language)
+def test_none_is_none(language_with_cuda):
+    test = epyccel_test(base.none_is_none, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_none_isnot_none(language):
-    test = epyccel_test(base.none_isnot_none, lang=language)
+def test_none_isnot_none(language_with_cuda):
+    test = epyccel_test(base.none_isnot_none, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_pass_if(language):
-    test = epyccel_test(base.pass_if, lang=language)
+def test_pass_if(language_with_cuda):
+    test = epyccel_test(base.pass_if, lang=language_with_cuda)
     test.compare_epyccel(2)
 
-def test_pass2_if(language):
-    test = epyccel_test(base.pass2_if, lang=language)
+def test_pass2_if(language_with_cuda):
+    test = epyccel_test(base.pass2_if, lang=language_with_cuda)
     test.compare_epyccel(0.2)
     test.compare_epyccel(0.0)
 
@@ -192,15 +192,15 @@ def test_use_optional(language):
     test.compare_epyccel()
     test.compare_epyccel(6)
 
-def test_none_equality(language):
-    test = epyccel_test(base.none_equality, lang=language)
+def test_none_equality(language_with_cuda):
+    test = epyccel_test(base.none_equality, lang=language_with_cuda)
     test.compare_epyccel()
     test.compare_epyccel(6)
 
-def test_none_none_equality(language):
-    test = epyccel_test(base.none_none_equality, lang=language)
+def test_none_none_equality(language_with_cuda):
+    test = epyccel_test(base.none_none_equality, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_none_literal_equality(language):
-    test = epyccel_test(base.none_literal_equality, lang=language)
+def test_none_literal_equality(language_with_cuda):
+    test = epyccel_test(base.none_literal_equality, lang=language_with_cuda)
     test.compare_epyccel()

From bd7351493e3ae2c0947b1d2fb92605360db4de08 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 15 May 2024 12:58:50 +0100
Subject: [PATCH 005/130] Fix import handling (#49)

This pull request fixes https://github.com/pyccel/pyccel-cuda/issues/48, by implementing a tiny wrapper for CUDA and a wrapper for non-CUDA functionalities only with external 'C'.

**Commit Summary**

-    Implemented new header printer for CUDA.
-    Added CUDA wrapper assignment
-    Instead of wrapping all local headers, wrap only C functions with extern 'C'

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
---
 AUTHORS                                     |  1 +
 CHANGELOG.md                                |  3 +-
 pyccel/codegen/printing/cucode.py           | 45 ++++++++----
 pyccel/codegen/python_wrapper.py            |  4 ++
 pyccel/codegen/wrapper/cuda_to_c_wrapper.py | 78 +++++++++++++++++++++
 tests/epyccel/modules/cuda_module.py        | 13 ++++
 tests/epyccel/test_epyccel_modules.py       | 13 ++++
 7 files changed, 143 insertions(+), 14 deletions(-)
 create mode 100644 pyccel/codegen/wrapper/cuda_to_c_wrapper.py
 create mode 100644 tests/epyccel/modules/cuda_module.py

diff --git a/AUTHORS b/AUTHORS
index 6c30ce5830..3dbaa2f249 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -31,3 +31,4 @@ Contributors
 * Farouk Ech-Charef
 * Mustapha Belbiad
 * Varadarajan Rengaraj
+* Said Mazouz
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ce9212abc6..1d99c60127 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,8 @@ All notable changes to this project will be documented in this file.
 
 ### Added
 
--   #32 : add support for `nvcc` Compiler and `cuda` language as a possible option.
+-   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
+-   #48 : Fix incorrect handling of imports in `cuda`.
 
 ## \[UNRELEASED\]
 
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 86146b065b..277d2a3a6a 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -52,19 +52,7 @@ def _print_Module(self, expr):
 
         # Print imports last to be sure that all additional_imports have been collected
         imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()]
-        c_headers_imports = ''
-        local_imports = ''
-
-        for imp in imports:
-            if imp.source in c_library_headers:
-                c_headers_imports += self._print(imp)
-            else:
-                local_imports += self._print(imp)
-
-        imports = f'{c_headers_imports}\
-                    extern "C"{{\n\
-                    {local_imports}\
-                    }}'
+        imports = ''.join(self._print(i) for i in imports)
 
         code = f'{imports}\n\
                  {global_variables}\n\
@@ -72,3 +60,34 @@ def _print_Module(self, expr):
 
         self.exit_scope()
         return code
+
+    def _print_ModuleHeader(self, expr):
+        self.set_scope(expr.module.scope)
+        self._in_header = True
+        name = expr.module.name
+
+        funcs = ""
+        cuda_headers = ""
+        for f in expr.module.funcs:
+            if not f.is_inline:
+                if 'kernel' in f.decorators:  # Checking for 'kernel' decorator
+                    cuda_headers += self.function_signature(f) + ';\n'
+                else:
+                    funcs += self.function_signature(f) + ';\n'
+        global_variables = ''.join('extern '+self._print(d) for d in expr.module.declarations if not d.variable.is_private)
+        # Print imports last to be sure that all additional_imports have been collected
+        imports = [*expr.module.imports, *self._additional_imports.values()]
+        imports = ''.join(self._print(i) for i in imports)
+
+        self._in_header = False
+        self.exit_scope()
+        function_declaration = f'{cuda_headers}\n\
+                    extern "C"{{\n\
+                    {funcs}\
+                    }}\n'
+        return '\n'.join((f"#ifndef {name.upper()}_H",
+                          f"#define {name.upper()}_H",
+                          global_variables,
+                          function_declaration,
+                          "#endif // {name.upper()}_H\n"))
+
diff --git a/pyccel/codegen/python_wrapper.py b/pyccel/codegen/python_wrapper.py
index 9437727042..62c303fa64 100644
--- a/pyccel/codegen/python_wrapper.py
+++ b/pyccel/codegen/python_wrapper.py
@@ -13,6 +13,7 @@
 from pyccel.codegen.printing.fcode               import FCodePrinter
 from pyccel.codegen.wrapper.fortran_to_c_wrapper import FortranToCWrapper
 from pyccel.codegen.wrapper.c_to_python_wrapper  import CToPythonWrapper
+from pyccel.codegen.wrapper.cuda_to_c_wrapper    import CudaToCWrapper
 from pyccel.codegen.utilities                    import recompile_object
 from pyccel.codegen.utilities                    import copy_internal_library
 from pyccel.codegen.utilities                    import internal_libs
@@ -144,6 +145,9 @@ def create_shared_library(codegen,
                 verbose=verbose)
         timings['Bind C wrapping'] = time.time() - start_bind_c_compiling
         c_ast = bind_c_mod
+    elif language == 'cuda':
+        wrapper = CudaToCWrapper()
+        c_ast = wrapper.wrap(codegen.ast)
     else:
         c_ast = codegen.ast
 
diff --git a/pyccel/codegen/wrapper/cuda_to_c_wrapper.py b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py
new file mode 100644
index 0000000000..c0e24c7c09
--- /dev/null
+++ b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Module describing the code-wrapping class : CudaToPythonWrapper
+which creates an interface exposing Cuda code to C.
+"""
+
+from pyccel.ast.bind_c      import BindCModule
+from pyccel.errors.errors   import Errors
+from pyccel.ast.bind_c      import BindCVariable
+from .wrapper               import Wrapper
+
+errors = Errors()
+
+class CudaToCWrapper(Wrapper):
+    """
+    Class for creating a wrapper exposing Cuda code to C.
+
+    While CUDA is typically compatible with C by default.
+    this wrapper becomes necessary in scenarios where specific adaptations
+    or modifications are required to ensure seamless integration with C.
+    """
+
+    def _wrap_Module(self, expr):
+        """
+        Create a Module which is compatible with C.
+
+        Create a Module which provides an interface between C and the
+        Module described by expr.
+
+        Parameters
+        ----------
+        expr : pyccel.ast.core.Module
+            The module to be wrapped.
+
+        Returns
+        -------
+        pyccel.ast.core.BindCModule
+            The C-compatible module.
+        """
+        init_func = expr.init_func
+        if expr.interfaces:
+            errors.report("Interface wrapping is not yet supported for Cuda",
+                      severity='warning', symbol=expr)
+        if expr.classes:
+            errors.report("Class wrapping is not yet supported for Cuda",
+                      severity='warning', symbol=expr)
+
+        variables = [self._wrap(v) for v in expr.variables]
+
+        return BindCModule(expr.name, variables, expr.funcs,
+                init_func=init_func,
+                scope = expr.scope,
+                original_module=expr)
+
+    def _wrap_Variable(self, expr):
+        """
+        Create all objects necessary to expose a module variable to C.
+
+        Create and return the objects which must be printed in the wrapping
+        module in order to expose the variable to C
+
+        Parameters
+        ----------
+        expr : pyccel.ast.variables.Variable
+            The module variable.
+
+        Returns
+        -------
+        pyccel.ast.core.BindCVariable
+            The C-compatible variable. which must be printed in
+            the wrapping module to expose the variable.
+        """
+        return expr.clone(expr.name, new_class = BindCVariable)
+
diff --git a/tests/epyccel/modules/cuda_module.py b/tests/epyccel/modules/cuda_module.py
new file mode 100644
index 0000000000..bb7ae6b98a
--- /dev/null
+++ b/tests/epyccel/modules/cuda_module.py
@@ -0,0 +1,13 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import numpy as np
+
+g = np.float64(9.81)
+r0 = np.float32(1.0)
+rmin = 0.01
+rmax = 1.0
+
+skip_centre = True
+
+method = 3
+
+tiny = np.int32(4)
diff --git a/tests/epyccel/test_epyccel_modules.py b/tests/epyccel/test_epyccel_modules.py
index ad8ae0bd75..223f741bf0 100644
--- a/tests/epyccel/test_epyccel_modules.py
+++ b/tests/epyccel/test_epyccel_modules.py
@@ -200,3 +200,16 @@ def test_awkward_names(language):
     assert mod.function() == modnew.function()
     assert mod.pure() == modnew.pure()
     assert mod.allocate(1) == modnew.allocate(1)
+
+def test_cuda_module(language_with_cuda):
+    import modules.cuda_module as mod
+
+    modnew = epyccel(mod, language=language_with_cuda)
+
+    atts = ('g', 'r0', 'rmin', 'rmax', 'skip_centre',
+            'method', 'tiny')
+    for att in atts:
+        mod_att = getattr(mod, att)
+        modnew_att = getattr(modnew, att)
+        assert mod_att == modnew_att
+        assert type(mod_att) is type(modnew_att)

From 261c152638e54caae3966e54985725a7fca505ba Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Thu, 27 Jun 2024 20:31:46 +0100
Subject: [PATCH 006/130] Add support for kernels (#42)

This pull request addresses issue #28 by implementing a new feature in
Pyccel that allows users to define custom GPU kernels. The syntax for
creating these kernels is inspired by Numba. and I also need to fix
issue #45 for testing purposes

**Commit Summary**

- Introduced KernelCall class
- Added cuda printer methods _print_KernelCall and _print_FunctionDef to
generate the corresponding CUDA representation for both kernel calls and
definitions
- Added IndexedFunctionCall  represents an indexed function call
- Added CUDA module and cuda.synchronize()
- Fixing a bug that I found in the header: it does not import the
necessary header for the used function

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
Co-authored-by: Emily Bourne <emily.bourne@epfl.ch>
---
 .dict_custom.txt                              |   1 +
 CHANGELOG.md                                  |   2 +
 docs/cuda.md                                  |  23 +++
 pyccel/ast/core.py                            |  37 ++++
 pyccel/ast/cuda.py                            |  65 +++++++
 pyccel/ast/cudaext.py                         |  42 +++++
 pyccel/ast/utilities.py                       |   4 +-
 pyccel/codegen/printing/cucode.py             |  46 ++++-
 pyccel/cuda/__init__.py                       |  10 +
 pyccel/cuda/cuda_sync_primitives.py           |  16 ++
 pyccel/decorators.py                          |  32 ++++
 pyccel/errors/messages.py                     |   8 +
 pyccel/parser/semantic.py                     |  84 ++++++++-
 pyccel/parser/syntactic.py                    |   4 +
 tests/conftest.py                             |   9 +
 tests/cuda/test_kernel_semantic.py            | 176 ++++++++++++++++++
 tests/pyccel/scripts/kernel/hello_kernel.py   |  19 ++
 .../scripts/kernel/kernel_name_collision.py   |   8 +
 tests/pyccel/test_pyccel.py                   |  22 ++-
 19 files changed, 599 insertions(+), 9 deletions(-)
 create mode 100644 docs/cuda.md
 create mode 100644 pyccel/ast/cuda.py
 create mode 100644 pyccel/ast/cudaext.py
 create mode 100644 pyccel/cuda/__init__.py
 create mode 100644 pyccel/cuda/cuda_sync_primitives.py
 create mode 100644 tests/cuda/test_kernel_semantic.py
 create mode 100644 tests/pyccel/scripts/kernel/hello_kernel.py
 create mode 100644 tests/pyccel/scripts/kernel/kernel_name_collision.py

diff --git a/.dict_custom.txt b/.dict_custom.txt
index ae99f31ed4..5d99e21194 100644
--- a/.dict_custom.txt
+++ b/.dict_custom.txt
@@ -118,3 +118,4 @@ datatyping
 datatypes
 indexable
 traceback
+GPUs
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1d99c60127..7c1dcffc55 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file.
 
 -   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
 -   #48 : Fix incorrect handling of imports in `cuda`.
+-   #42 : Add support for custom kernel in`cuda`.
+-   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
 
 ## \[UNRELEASED\]
 
diff --git a/docs/cuda.md b/docs/cuda.md
new file mode 100644
index 0000000000..de30d52b80
--- /dev/null
+++ b/docs/cuda.md
@@ -0,0 +1,23 @@
+# Getting started GPU
+
+Pyccel now supports NVIDIA CUDA, empowering users to accelerate numerical computations on GPUs seamlessly. With Pyccel's high-level syntax and automatic code generation, harnessing the power of CUDA becomes effortless. This documentation provides a quick guide to enabling CUDA in Pyccel
+
+## Cuda Decorator
+
+### kernel
+
+The kernel decorator allows the user to declare a CUDA kernel. The kernel can be defined in Python, and the syntax is similar to that of Numba.
+
+```python
+from pyccel.decorators import kernel
+
+@kernel
+def my_kernel():
+    pass
+
+blockspergrid = 1
+threadsperblock = 1
+# Call your kernel function
+my_kernel[blockspergrid, threadsperblock]()
+
+```
\ No newline at end of file
diff --git a/pyccel/ast/core.py b/pyccel/ast/core.py
index 013f206dd6..f0e5cc67f1 100644
--- a/pyccel/ast/core.py
+++ b/pyccel/ast/core.py
@@ -73,6 +73,7 @@
     'If',
     'IfSection',
     'Import',
+    'IndexedFunctionCall',
     'InProgram',
     'InlineFunctionDef',
     'Interface',
@@ -2065,6 +2066,42 @@ def _ignore(cls, c):
         """
         return c is None or isinstance(c, (FunctionDef, *cls._ignored_types))
 
+class IndexedFunctionCall(FunctionCall):
+    """
+    Represents an indexed function call in the code.
+
+    Class representing indexed function calls, encapsulating all
+    relevant information for such calls within the code base.
+
+    Parameters
+    ----------
+    func : FunctionDef
+        The function being called.
+
+    args : iterable of FunctionCallArgument
+        The arguments passed to the function.
+
+    indexes : iterable of TypedAstNode
+        The indexes of the function call.
+
+    current_function : FunctionDef, optional
+        The function where the call takes place.
+    """
+    __slots__ = ('_indexes',)
+    _attribute_nodes = FunctionCall._attribute_nodes + ('_indexes',)
+    def __init__(self, func, args, indexes, current_function = None):
+        self._indexes = indexes
+        super().__init__(func, args, current_function)
+
+    @property
+    def indexes(self):
+        """
+        Indexes of function call.
+
+        Represents the indexes of the function call
+        """
+        return self._indexes
+
 class ConstructorCall(FunctionCall):
 
     """
diff --git a/pyccel/ast/cuda.py b/pyccel/ast/cuda.py
new file mode 100644
index 0000000000..f1e50ef7f0
--- /dev/null
+++ b/pyccel/ast/cuda.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+CUDA Module
+This module provides a collection of classes and utilities for CUDA programming.
+"""
+from pyccel.ast.core import FunctionCall
+
+__all__ = (
+    'KernelCall',
+)
+
+class KernelCall(FunctionCall):
+    """
+    Represents a kernel function call in the code.
+
+    The class serves as a representation of a kernel
+    function call within the codebase.
+
+    Parameters
+    ----------
+    func : FunctionDef
+        The definition of the function being called.
+
+    args : iterable of FunctionCallArgument
+        The arguments passed to the function.
+
+    num_blocks : TypedAstNode
+        The number of blocks. These objects must have a primitive type of `PrimitiveIntegerType`.
+
+    tp_block : TypedAstNode
+        The number of threads per block. These objects must have a primitive type of `PrimitiveIntegerType`.
+
+    current_function : FunctionDef, optional
+        The function where the call takes place.
+    """
+    __slots__ = ('_num_blocks','_tp_block')
+    _attribute_nodes = (*FunctionCall._attribute_nodes, '_num_blocks', '_tp_block')
+
+    def __init__(self, func, args, num_blocks, tp_block, current_function = None):
+        self._num_blocks = num_blocks
+        self._tp_block = tp_block
+        super().__init__(func, args, current_function)
+
+    @property
+    def num_blocks(self):
+        """
+        The number of blocks in the kernel being called.
+
+        The number of blocks in the kernel being called.
+        """
+        return self._num_blocks
+
+    @property
+    def tp_block(self):
+        """
+        The number of threads per block.
+
+        The number of threads per block.
+        """
+        return self._tp_block
+
diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
new file mode 100644
index 0000000000..b540f20993
--- /dev/null
+++ b/pyccel/ast/cudaext.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+CUDA Extension Module
+Provides CUDA functionality for code generation.
+"""
+from .internals      import PyccelFunction
+
+from .datatypes      import VoidType
+from .core           import Module, PyccelFunctionDef
+
+__all__ = (
+    'CudaSynchronize',
+)
+
+class CudaSynchronize(PyccelFunction):
+    """
+    Represents a call to Cuda.synchronize for code generation.
+
+    This class serves as a representation of the Cuda.synchronize method.
+    """
+    __slots__ = ()
+    _attribute_nodes = ()
+    _shape     = None
+    _class_type = VoidType()
+    def __init__(self):
+        super().__init__()
+
+cuda_funcs = {
+    'synchronize'       : PyccelFunctionDef('synchronize' , CudaSynchronize),
+}
+
+cuda_mod = Module('cuda',
+    variables=[],
+    funcs=cuda_funcs.values(),
+    imports=[]
+)
+
diff --git a/pyccel/ast/utilities.py b/pyccel/ast/utilities.py
index 1e6c0422ab..e5cd77b168 100644
--- a/pyccel/ast/utilities.py
+++ b/pyccel/ast/utilities.py
@@ -25,6 +25,7 @@
 from .literals      import LiteralInteger, LiteralEllipsis, Nil
 from .mathext       import math_mod
 from .sysext        import sys_mod
+from .cudaext       import cuda_mod
 
 from .numpyext      import (NumpyEmpty, NumpyArray, numpy_mod,
                             NumpyTranspose, NumpyLinspace)
@@ -49,7 +50,8 @@
 decorators_mod = Module('decorators',(),
         funcs = [PyccelFunctionDef(d, PyccelFunction) for d in pyccel_decorators.__all__])
 pyccel_mod = Module('pyccel',(),(),
-        imports = [Import('decorators', decorators_mod)])
+        imports = [Import('decorators', decorators_mod),
+                    Import('cuda', cuda_mod)])
 
 # TODO add documentation
 builtin_import_registry = Module('__main__',
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 277d2a3a6a..cd26843017 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -9,11 +9,12 @@
 enabling the direct translation of high-level Pyccel expressions into CUDA code.
 """
 
-from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers
+from pyccel.codegen.printing.ccode  import CCodePrinter
 
-from pyccel.ast.core        import Import, Module
+from pyccel.ast.core                import Import, Module
+from pyccel.ast.literals            import Nil
 
-from pyccel.errors.errors   import Errors
+from pyccel.errors.errors           import Errors
 
 
 errors = Errors()
@@ -61,6 +62,44 @@ def _print_Module(self, expr):
         self.exit_scope()
         return code
 
+    def function_signature(self, expr, print_arg_names = True):
+        """
+        Get the Cuda representation of the function signature.
+
+        Extract from the function definition `expr` all the
+        information (name, input, output) needed to create the
+        function signature and return a string describing the
+        function.
+        This is not a declaration as the signature does not end
+        with a semi-colon.
+
+        Parameters
+        ----------
+        expr : FunctionDef
+            The function definition for which a signature is needed.
+
+        print_arg_names : bool, default : True
+            Indicates whether argument names should be printed.
+
+        Returns
+        -------
+        str
+            Signature of the function.
+        """
+        cuda_decorater = '__global__' if 'kernel' in expr.decorators else ''
+        c_function_signature = super().function_signature(expr, print_arg_names)
+        return f'{cuda_decorater} {c_function_signature}'
+
+    def _print_KernelCall(self, expr):
+        func = expr.funcdef
+        args = [a.value or Nil() for a in expr.args]
+
+        args = ', '.join(self._print(a) for a in args)
+        return f"{func.name}<<<{expr.num_blocks}, {expr.tp_block}>>>({args});\n"
+
+    def _print_CudaSynchronize(self, expr):
+        return 'cudaDeviceSynchronize();\n'
+
     def _print_ModuleHeader(self, expr):
         self.set_scope(expr.module.scope)
         self._in_header = True
@@ -87,6 +126,7 @@ def _print_ModuleHeader(self, expr):
                     }}\n'
         return '\n'.join((f"#ifndef {name.upper()}_H",
                           f"#define {name.upper()}_H",
+                          imports,
                           global_variables,
                           function_declaration,
                           "#endif // {name.upper()}_H\n"))
diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py
new file mode 100644
index 0000000000..e8542ad5d5
--- /dev/null
+++ b/pyccel/cuda/__init__.py
@@ -0,0 +1,10 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+    This module is for exposing the CudaSubmodule functions.
+"""
+from .cuda_sync_primitives    import synchronize
+
+__all__ = ['synchronize']
diff --git a/pyccel/cuda/cuda_sync_primitives.py b/pyccel/cuda/cuda_sync_primitives.py
new file mode 100644
index 0000000000..f3442fe9e2
--- /dev/null
+++ b/pyccel/cuda/cuda_sync_primitives.py
@@ -0,0 +1,16 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+This submodule contains CUDA methods for Pyccel.
+"""
+
+
+def synchronize():
+    """
+    Synchronize CUDA device execution.
+
+    Synchronize CUDA device execution.
+    """
+
diff --git a/pyccel/decorators.py b/pyccel/decorators.py
index 1f640043db..77717a991f 100644
--- a/pyccel/decorators.py
+++ b/pyccel/decorators.py
@@ -19,6 +19,7 @@
     'sympy',
     'template',
     'types',
+    'kernel'
 )
 
 
@@ -109,3 +110,34 @@ def allow_negative_index(f,*args):
     def identity(f):
         return f
     return identity
+
+def kernel(f):
+    """
+    Decorator for marking a Python function as a kernel.
+
+    This class serves as a decorator to mark a Python function
+    as a kernel function, typically used for GPU computations.
+    This allows the function to be indexed with the number of blocks and threads.
+
+    Parameters
+    ----------
+    f : function
+        The function to which the decorator is applied.
+
+    Returns
+    -------
+    KernelAccessor
+        A class representing the kernel function.
+    """
+    class KernelAccessor:
+        """
+        Class representing the kernel function.
+
+        Class representing the kernel function.
+        """
+        def __init__(self, f):
+            self._f = f
+        def __getitem__(self, args):
+            return self._f
+
+    return KernelAccessor(f)
diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py
index 79eccc1df2..09966d810c 100644
--- a/pyccel/errors/messages.py
+++ b/pyccel/errors/messages.py
@@ -162,3 +162,11 @@
 WRONG_LINSPACE_ENDPOINT = 'endpoint argument must be boolean'
 NON_LITERAL_KEEP_DIMS = 'keep_dims argument must be a literal, otherwise rank is unknown'
 NON_LITERAL_AXIS = 'axis argument must be a literal, otherwise pyccel cannot determine which dimension to operate on'
+MISSING_KERNEL_CONFIGURATION = 'Kernel launch configuration not specified'
+INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch'
+INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call'
+INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call'
+
+
+
+
diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py
index e94b9c8413..fde10d6317 100644
--- a/pyccel/parser/semantic.py
+++ b/pyccel/parser/semantic.py
@@ -116,6 +116,8 @@
 from pyccel.ast.variable import IndexedElement, AnnotatedPyccelSymbol
 from pyccel.ast.variable import DottedName, DottedVariable
 
+from pyccel.ast.cuda import     KernelCall
+
 from pyccel.errors.errors import Errors
 from pyccel.errors.errors import PyccelSemanticError
 
@@ -133,7 +135,9 @@
         PYCCEL_RESTRICTION_LIST_COMPREHENSION_LIMITS, PYCCEL_RESTRICTION_LIST_COMPREHENSION_SIZE,
         UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE,
         PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT,
-        FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC)
+        FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET,
+        MISSING_KERNEL_CONFIGURATION,
+        INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK)
 
 from pyccel.parser.base      import BasicParser
 from pyccel.parser.syntactic import SyntaxParser
@@ -1139,6 +1143,67 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun
 
             return new_expr
 
+    def _handle_kernel(self, expr, func, args):
+        """
+        Create the node representing the kernel function call.
+
+        Create a FunctionCall or an instance of a PyccelInternalFunction
+        from the function information and arguments.
+
+        Parameters
+        ----------
+        expr : IndexedFunctionCall
+               Node has all the information about the function call.
+
+        func : FunctionDef | Interface | PyccelInternalFunction type
+               The function being called.
+
+        args : iterable of FunctionCallArgument
+               The arguments passed to the function.
+
+        Returns
+        -------
+        Pyccel.ast.cuda.KernelCall
+            The semantic representation of the kernel call.
+        """
+        if len(expr.indexes) != 2:
+            errors.report(INVALID_KERNEL_LAUNCH_CONFIG,
+                    symbol=expr,
+                    severity='fatal')
+        if len(func.results):
+            errors.report(f"cuda kernel function '{func.name}' returned a value in violation of the laid-down specification",
+                         symbol=expr,
+                         severity='fatal')
+        if isinstance(func, FunctionDef) and len(args) != len(func.arguments):
+            errors.report(f"{len(args)} argument types given, but function takes {len(func.arguments)} arguments",
+                symbol=expr,
+                severity='fatal')
+        if not isinstance(expr.indexes[0], (LiteralInteger)):
+            if isinstance(expr.indexes[0], PyccelSymbol):
+                num_blocks = self.get_variable(expr.indexes[0])
+
+                if not isinstance(num_blocks.dtype, PythonNativeInt):
+                    errors.report(INVALID_KERNEL_CALL_BP_GRID,
+                    symbol = expr,
+                    severity='fatal')
+            else:
+                errors.report(INVALID_KERNEL_CALL_BP_GRID,
+                    symbol = expr,
+                    severity='fatal')
+        if not isinstance(expr.indexes[1], (LiteralInteger)):
+            if isinstance(expr.indexes[1], PyccelSymbol):
+                tp_block = self.get_variable(expr.indexes[1])
+                if not isinstance(tp_block.dtype, PythonNativeInt):
+                    errors.report(INVALID_KERNEL_CALL_TP_BLOCK,
+                    symbol = expr,
+                    severity='fatal')
+            else:
+                errors.report(INVALID_KERNEL_CALL_TP_BLOCK,
+                    symbol = expr,
+                    severity='fatal')
+        new_expr = KernelCall(func, args, expr.indexes[0], expr.indexes[1])
+        return new_expr
+
     def _sort_function_call_args(self, func_args, args):
         """
         Sort and add the missing call arguments to match the arguments in the function definition.
@@ -2815,6 +2880,23 @@ def _visit_Lambda(self, expr):
                 expr = Lambda(tuple(expr.variables), expr_new)
         return expr
 
+    def _visit_IndexedFunctionCall(self, expr):
+        name     = expr.funcdef
+        name = self.scope.get_expected_name(name)
+        func     = self.scope.find(name, 'functions')
+        args = self._handle_function_args(expr.args)
+
+        if func is None:
+            return errors.report(UNDEFINED_FUNCTION, symbol=expr.funcdef,
+                    bounding_box=(self.current_ast_node.lineno, self.current_ast_node.col_offset),
+                    severity='fatal')
+
+        func = self._annotate_the_called_function_def(func)
+        if 'kernel' in func.decorators :
+            return self._handle_kernel(expr, func, args)
+        else:
+            return errors.report("Unknown function type",
+                symbol=expr, severity='fatal')
     def _visit_FunctionCall(self, expr):
         name     = expr.funcdef
         try:
diff --git a/pyccel/parser/syntactic.py b/pyccel/parser/syntactic.py
index 2967f4999b..3af7f0728a 100644
--- a/pyccel/parser/syntactic.py
+++ b/pyccel/parser/syntactic.py
@@ -64,6 +64,8 @@
 
 from pyccel.ast.type_annotations import SyntacticTypeAnnotation, UnionTypeAnnotation
 
+from pyccel.ast.core import IndexedFunctionCall
+
 from pyccel.parser.base        import BasicParser
 from pyccel.parser.extend_tree import extend_tree
 from pyccel.parser.utilities   import get_default_path
@@ -1102,6 +1104,8 @@ def _visit_Call(self, stmt):
         elif isinstance(func, DottedName):
             func_attr = FunctionCall(func.name[-1], args)
             func = DottedName(*func.name[:-1], func_attr)
+        elif isinstance(func,IndexedElement):
+            func = IndexedFunctionCall(func.base, args, func.indices)
         else:
             raise NotImplementedError(f' Unknown function type {type(func)}')
 
diff --git a/tests/conftest.py b/tests/conftest.py
index a5082ef6e8..4e74d1ec7a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -59,6 +59,15 @@ def pytest_runtest_teardown(item, nextitem):
 
 def pytest_addoption(parser):
     parser.addoption("--developer-mode", action="store_true", default=github_debugging, help="Show tracebacks when pyccel errors are raised")
+    parser.addoption("--gpu_available", action="store_true",
+                default=False, help="enable GPU tests")
+
+def pytest_generate_tests(metafunc):
+    if "gpu_available" in metafunc.fixturenames:
+        if metafunc.config.getoption("gpu_available"):
+            metafunc.parametrize("gpu_available", [True])
+        else:
+            metafunc.parametrize("gpu_available", [False])
 
 def pytest_sessionstart(session):
     # setup_stuff
diff --git a/tests/cuda/test_kernel_semantic.py b/tests/cuda/test_kernel_semantic.py
new file mode 100644
index 0000000000..00b74c3bea
--- /dev/null
+++ b/tests/cuda/test_kernel_semantic.py
@@ -0,0 +1,176 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import pytest
+
+from pyccel import epyccel
+from pyccel.decorators import kernel
+from pyccel.errors.errors import Errors, PyccelSemanticError
+from pyccel.errors.messages import (INVALID_KERNEL_CALL_TP_BLOCK,
+                                    INVALID_KERNEL_CALL_BP_GRID,
+                                    INVALID_KERNEL_LAUNCH_CONFIG)
+
+
+@pytest.mark.cuda
+def test_invalid_block_number():
+    def invalid_block_number():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1.0
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_block_number, language="cuda")
+
+    assert errors.has_errors()
+
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_CALL_BP_GRID == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_thread_per_block():
+    def invalid_thread_per_block():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1.0
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_thread_per_block, language="cuda")
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_CALL_TP_BLOCK == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_launch_config_high():
+    def invalid_launch_config_high():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        third_param = 1
+        kernel_call[blocks_per_grid, threads_per_block, third_param]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_launch_config_high, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_launch_config_low():
+    def invalid_launch_config_low():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        kernel_call[blocks_per_grid]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_launch_config_low, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_arguments_for_kernel_call():
+    def invalid_arguments():
+        @kernel
+        def kernel_call(arg : int):
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_arguments, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "0 argument types given, but function takes 1 arguments" == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_arguments_for_kernel_call_2():
+    def invalid_arguments_():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block](1)
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_arguments_, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "1 argument types given, but function takes 0 arguments" == error_info.message
+
+
+@pytest.mark.cuda
+def test_kernel_return():
+    def kernel_return():
+        @kernel
+        def kernel_call():
+            return 7
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block](1)
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(kernel_return, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "cuda kernel function 'kernel_call' returned a value in violation of the laid-down specification" == error_info.message
diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py
new file mode 100644
index 0000000000..b6901b25a1
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/hello_kernel.py
@@ -0,0 +1,19 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+from pyccel import cuda
+
+@kernel
+def say_hello(its_morning : bool):
+    if(its_morning):
+        print("Hello and Good morning")
+    else:
+        print("Hello and Good afternoon")
+
+def f():
+    its_morning = True
+    say_hello[1,1](its_morning)
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
+
diff --git a/tests/pyccel/scripts/kernel/kernel_name_collision.py b/tests/pyccel/scripts/kernel/kernel_name_collision.py
new file mode 100644
index 0000000000..ac7abe25ae
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/kernel_name_collision.py
@@ -0,0 +1,8 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+
+@kernel
+def do():
+    pass
+
+do[1,1]()
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index ec1e846549..b4757a3c31 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -294,7 +294,7 @@ def compare_pyth_fort_output( p_output, f_output, dtype=float, language=None):
 #------------------------------------------------------------------------------
 def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True,
         cwd = None, pyccel_commands = "", output_dtype = float,
-        language = None, output_dir = None):
+        language = None, output_dir = None, execute_code = True):
     """
     Run pyccel and compare the output to ensure that the results
     are equivalent
@@ -394,13 +394,14 @@ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True,
             compile_fortran(cwd, output_test_file, dependencies)
         elif language == 'c':
             compile_c(cwd, output_test_file, dependencies)
-
-    lang_out = get_lang_output(output_test_file, language)
-    compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language)
+    if  execute_code:
+        lang_out = get_lang_output(output_test_file, language)
+        compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language)
 
 #==============================================================================
 # UNIT TESTS
 #==============================================================================
+
 def test_relative_imports_in_project(language):
 
     base_dir = os.path.dirname(os.path.realpath(__file__))
@@ -728,6 +729,19 @@ def test_multiple_results(language):
 def test_elemental(language):
     pyccel_test("scripts/decorators_elemental.py", language = language)
 
+#------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_hello_kernel(gpu_available):
+    types = str
+    pyccel_test("scripts/kernel/hello_kernel.py",
+            language="cuda", output_dtype=types , execute_code=gpu_available)
+
+#------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_kernel_collision(gpu_available):
+    pyccel_test("scripts/kernel/kernel_name_collision.py",
+            language="cuda", execute_code=gpu_available)
+
 #------------------------------------------------------------------------------
 def test_print_strings(language):
     types = str

From 80f905bed09e70bcbded0c27e0632b50fc1e1a06 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Tue, 2 Jul 2024 15:37:10 +0100
Subject: [PATCH 007/130] Begin implementation of CUDA arrays: adding cudaempty
 and cudafull functions, and refining CUDA type handling

---
 pyccel/ast/cudaext.py     | 100 ++++++++++++++++++++++++++++++++++++++
 pyccel/ast/cudatypes.py   |  34 +++++++++++++
 pyccel/ast/numpyext.py    |   9 +++-
 pyccel/ast/test.cu        |  46 ++++++++++++++++++
 pyccel/ast/test.py        |   4 ++
 pyccel/parser/semantic.py |   5 +-
 6 files changed, 195 insertions(+), 3 deletions(-)
 create mode 100644 pyccel/ast/cudatypes.py
 create mode 100644 pyccel/ast/test.cu
 create mode 100644 pyccel/ast/test.py

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index b540f20993..07ffb4d5e0 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -12,11 +12,109 @@
 
 from .datatypes      import VoidType
 from .core           import Module, PyccelFunctionDef
+from .internals      import PyccelFunction
+from .internals      import LiteralInteger
+from .numpyext       import process_dtype, process_shape , DtypePrecisionToCastFunction
+from .numpytypes     import NumpyNDArrayType
+
+
 
 __all__ = (
     'CudaSynchronize',
+    'CudaNewarray'
 )
 
+class CudaNewarray(PyccelFunction):
+    """
+    superclass for nodes representing Cuda array allocation functions.
+
+    Class from which all nodes representing a Cuda function which implies a call
+    to `Allocate` should inherit.
+
+    Parameters
+
+    class_type : NumpyNDArrayType
+        The type of the new array.
+
+    init_dtype : PythonType, PyccelFunctionDef, LiteralString, str
+        The actual dtype passed to the Cuda function.
+
+    memory_location : str
+        The memory location of the new array ('host' or 'device').
+    """
+    __slots__ = ('class_type', 'init_dtype', 'memory_location')
+
+    def __init__(self, class_type, init_dtype, memory_location):
+        self.class_type = class_type
+        self.init_dtype = init_dtype
+        self.memory_location = memory_location
+
+        super().__init__()
+    @staticmethod
+    def _process_order(rank, order):
+
+        if rank < 2:
+            return None
+        order = str(order).strip('\'"')
+        assert order in ('C', 'F')
+        return order
+
+class CudaFull(CudaNewarray):
+  
+    __slots__ = ('_fill_value','_shape')
+
+    def __init__(self, shape, fill_value, dtype='float', order='C'):
+        shape = process_shape(False, shape)
+        init_dtype = dtype
+        if(dtype is None):
+            dtype = fill_value.dtype
+
+        dtype = process_dtype(dtype)
+
+        # if fill_value and fill_value.dtype != dtype:
+        #     cast_func = DtypePrecisionToCastFunction[dtype]
+        #     fill_value = cast_func(fill_value)
+        self.shape = shape
+        rank = len(shape)
+        order = CudaNewarray._process_order(rank, order)
+        class_type = NumpyNDArrayType(dtype, shape, order)
+
+        super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype)
+
+
+class CudaAutoFill(CudaFull):
+    """ Abstract class for all classes which inherit from NumpyFull but
+        the fill_value is implicitly specified
+    """
+    __slots__ = ()
+    def __init__(self, shape, dtype='float', order='C'):
+        if not dtype:
+            raise TypeError("Data type must be provided")
+        super().__init__(shape, None, dtype, order)
+
+class CudaEmpty(CudaNewarray):
+    """
+    Represents a call to  Cuda.host_empty for code generation.
+
+    A class representing a call to the Cuda `host_empty` function.
+
+    Parameters
+    ----------
+    shape : tuple of int , int
+        The shape of the new array.
+
+    dtype : PythonType, LiteralString, str
+        The actual dtype passed to the NumPy function.
+
+    order : str , LiteralString
+        The order passed to the function defoulting to 'C'.
+    """
+    __slots__ = ()
+
+    def __init__(self, shape, dtype='float', order='C'):
+        super().__init__(shape, dtype, order)
+
+
 class CudaSynchronize(PyccelFunction):
     """
     Represents a call to Cuda.synchronize for code generation.
@@ -32,6 +130,8 @@ def __init__(self):
 
 cuda_funcs = {
     'synchronize'       : PyccelFunctionDef('synchronize' , CudaSynchronize),
+    'full'              : PyccelFunctionDef('full' , CudaFull),
+    'empty'             : PyccelFunctionDef('empty' , CudaEmpty),
 }
 
 cuda_mod = Module('cuda',
diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
new file mode 100644
index 0000000000..51ed80299a
--- /dev/null
+++ b/pyccel/ast/cudatypes.py
@@ -0,0 +1,34 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/devel/LICENSE for full license details.      #
+#------------------------------------------------------------------------------------------#
+""" Module containing types from the numpy module understood by pyccel
+"""
+
+class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton):
+    """
+    Class representing the Cuda array type.
+    
+    Class representing the Cuda array type
+    
+    dtype : NumpyNumericType | PythonNativeBool | GenericType
+        The internal datatype of the object (GenericType is allowed for external
+        libraries, e.g. MPI).
+    rank : int
+        The rank of the new NumPy array.
+    order : str
+        The order of the memory layout for the new NumPy array.
+    memory_location : str
+        The memory location of the new cuda array.
+    """
+    __slots__ = ('_dtype', '_rank', '_order', '_memory_location')
+    
+    def __new__(cls, dtype, rank, order):
+        if rank == 0:
+            return dtype
+        else:
+            return super().__new__(cls, dtype, rank, order)
+    
+    
diff --git a/pyccel/ast/numpyext.py b/pyccel/ast/numpyext.py
index 861b640282..766c581e20 100644
--- a/pyccel/ast/numpyext.py
+++ b/pyccel/ast/numpyext.py
@@ -620,6 +620,7 @@ class NumpyNewArray(PyccelFunction):
     init_dtype : PythonType, PyccelFunctionDef, LiteralString, str
         The actual dtype passed to the NumPy function.
     """
+    print(123)
     __slots__ = ('_init_dtype','_class_type')
 
     def __init__(self, *args, class_type, init_dtype = None):
@@ -671,6 +672,7 @@ def _process_order(rank, order):
 
 #==============================================================================
 class NumpyArray(NumpyNewArray):
+    print(1234)
     """
     Represents a call to `numpy.array` for code generation.
 
@@ -1312,15 +1314,20 @@ class NumpyFull(NumpyNewArray):
     def __init__(self, shape, fill_value, dtype=None, order='C'):
 
         # Convert shape to PythonTuple
+        print(shape)
+        print(type(shape))
         shape = process_shape(False, shape)
+        print(shape)
+        print(type(shape))
 
         init_dtype = dtype
         # If there is no dtype, extract it from fill_value
         # TODO: must get dtype from an annotated node
         if dtype is None:
             dtype = fill_value.dtype
+   
         dtype = process_dtype(dtype)
-
+        
         # Cast fill_value to correct type
         if fill_value:
             if fill_value.dtype != dtype:
diff --git a/pyccel/ast/test.cu b/pyccel/ast/test.cu
new file mode 100644
index 0000000000..5938aa2d6d
--- /dev/null
+++ b/pyccel/ast/test.cu
@@ -0,0 +1,46 @@
+#include <iostream>
+#include <cuda_runtime.h>
+
+__global__ void add(int *a, int *b, int *c, int n) {
+    int index = threadIdx.x + blockIdx.x * blockDim.x;
+    if (index < n) {
+        c[index] = a[index] + b[index];
+    }
+}
+
+int main() {
+    int n = 512;
+    int size = n * sizeof(int);
+    int *a, *b, *c;
+
+    // Allocate unified memory - accessible from CPU or GPU
+    cudaMallocManaged(&a, size);
+    cudaMallocManaged(&b, size);
+    cudaMallocManaged(&c, size);
+
+    // Initialize arrays on the host (CPU)
+    for (int i = 0; i < n; i++) {
+        a[i] = i;
+        b[i] = i * 2;
+    }
+
+    // Launch kernel with n threads
+    int blockSize = 256;
+    int numBlocks = (n + blockSize - 1) / blockSize;
+    add<<<numBlocks, blockSize>>>(a, b, c, n);
+
+    // Wait for GPU to finish before accessing on host
+    cudaDeviceSynchronize();
+
+    // Verify the result
+    for (int i = 0; i < n; i++) {
+        if (c[i] != a[i] + b[i]) {
+            std::cerr << "Error at index " << i << ": " << c[i] << " != " << a[i] + b[i] << std::endl
+                      << std::endl;
+            return 1;
+        }
+    }
+
+    std::cout << "Success!" << std::endl;
+    return 0;
+}
diff --git a/pyccel/ast/test.py b/pyccel/ast/test.py
new file mode 100644
index 0000000000..c12492de06
--- /dev/null
+++ b/pyccel/ast/test.py
@@ -0,0 +1,4 @@
+import numpy as np
+
+a = np.full((2, 3), 1, device = 'cpu')
+print(a)
\ No newline at end of file
diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py
index fde10d6317..f8cc4035d9 100644
--- a/pyccel/parser/semantic.py
+++ b/pyccel/parser/semantic.py
@@ -3127,7 +3127,7 @@ def _visit_Assign(self, expr):
 
         elif isinstance(rhs, CodeBlock) and len(rhs.body)>1 and isinstance(rhs.body[1], FunctionalFor):
             return rhs
-
+        
         elif isinstance(rhs, FunctionCall):
             func = rhs.funcdef
             results = func.results
@@ -3160,7 +3160,7 @@ def _visit_Assign(self, expr):
                     d_var['memory_handling'] = arg.memory_handling
                     d_var['class_type'     ] = arg.class_type
                     d_var['cls_base'       ] = arg.cls_base
-
+        
         elif isinstance(rhs, NumpyTranspose):
             d_var  = self._infer_type(rhs)
             if d_var['memory_handling'] == 'alias' and not isinstance(lhs, IndexedElement):
@@ -3169,6 +3169,7 @@ def _visit_Assign(self, expr):
             if expr.lhs.is_temp:
                 return rhs
             else:
+                
                 raise NotImplementedError("Cannot assign result of a function without a return")
 
         else:

From 7e8cf9e0879ddd61b45943f2afb265b693b341d2 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 3 Jul 2024 09:51:26 +0100
Subject: [PATCH 008/130] work in progress

---
 pyccel/ast/cudatypes.py | 13 ++++++++++++-
 pyccel/ast/numpyext.py  |  2 --
 pyccel/errors/errors.py |  2 +-
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index 51ed80299a..822c9921b2 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -30,5 +30,16 @@ def __new__(cls, dtype, rank, order):
             return dtype
         else:
             return super().__new__(cls, dtype, rank, order)
+    def __init__(self, dtype, rank, order, memory_location):
+        assert isinstance(rank, int)
+        assert order in (None, 'C', 'F')
+
+        self._dtype = dtype
+        self._rank = rank
+        self._order = order
+        self._memory_location = memory_location
+        super().__init__()
     
-    
+    @lru_cache
+    def __add__(self, other)
+    
\ No newline at end of file
diff --git a/pyccel/ast/numpyext.py b/pyccel/ast/numpyext.py
index 766c581e20..5c6067bb39 100644
--- a/pyccel/ast/numpyext.py
+++ b/pyccel/ast/numpyext.py
@@ -620,7 +620,6 @@ class NumpyNewArray(PyccelFunction):
     init_dtype : PythonType, PyccelFunctionDef, LiteralString, str
         The actual dtype passed to the NumPy function.
     """
-    print(123)
     __slots__ = ('_init_dtype','_class_type')
 
     def __init__(self, *args, class_type, init_dtype = None):
@@ -672,7 +671,6 @@ def _process_order(rank, order):
 
 #==============================================================================
 class NumpyArray(NumpyNewArray):
-    print(1234)
     """
     Represents a call to `numpy.array` for code generation.
 
diff --git a/pyccel/errors/errors.py b/pyccel/errors/errors.py
index b261a81830..f4172820db 100644
--- a/pyccel/errors/errors.py
+++ b/pyccel/errors/errors.py
@@ -345,7 +345,7 @@ def report(self,
                 traceback = ''.join(tb.format_stack(limit=5))
         else:
             traceback = None
-
+        print(pyccel_stage.current_stage)
         info = ErrorInfo(stage=pyccel_stage.current_stage,
                          filename=filename,
                          message=message,

From 2dbcfaeead521e24f8bca0b0e71c42e1afa3a9df Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 3 Jul 2024 15:47:18 +0100
Subject: [PATCH 009/130] work in progress

---
 pyccel/ast/cudaext.py    | 47 +++++++++++----------
 pyccel/ast/cudatypes.py  | 89 +++++++++++++++++++++++++++++++++++-----
 pyccel/ast/numpyext.py   | 11 ++---
 pyccel/ast/numpytypes.py |  2 +
 pyccel/ast/test.py       | 24 +++++++++--
 5 files changed, 130 insertions(+), 43 deletions(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index 07ffb4d5e0..4a534f35b1 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -9,13 +9,14 @@
 Provides CUDA functionality for code generation.
 """
 from .internals      import PyccelFunction
+from .literals       import Nil
 
 from .datatypes      import VoidType
 from .core           import Module, PyccelFunctionDef
 from .internals      import PyccelFunction
 from .internals      import LiteralInteger
 from .numpyext       import process_dtype, process_shape , DtypePrecisionToCastFunction
-from .numpytypes     import NumpyNDArrayType
+from .cudatypes      import CudaArrayType
 
 
 
@@ -42,14 +43,14 @@ class CudaNewarray(PyccelFunction):
     memory_location : str
         The memory location of the new array ('host' or 'device').
     """
-    __slots__ = ('class_type', 'init_dtype', 'memory_location')
+    __slots__ = ('_class_type', '_init_dtype', '_memory_location')
 
-    def __init__(self, class_type, init_dtype, memory_location):
-        self.class_type = class_type
-        self.init_dtype = init_dtype
-        self.memory_location = memory_location
+    def __init__(self, *arg,class_type, init_dtype, memory_location):
+        self._class_type = class_type
+        self._init_dtype = init_dtype
+        self._memory_location = memory_location
 
-        super().__init__()
+        super().__init__(*arg)
     @staticmethod
     def _process_order(rank, order):
 
@@ -62,6 +63,7 @@ def _process_order(rank, order):
 class CudaFull(CudaNewarray):
   
     __slots__ = ('_fill_value','_shape')
+    name = 'full'
 
     def __init__(self, shape, fill_value, dtype='float', order='C'):
         shape = process_shape(False, shape)
@@ -71,15 +73,11 @@ def __init__(self, shape, fill_value, dtype='float', order='C'):
 
         dtype = process_dtype(dtype)
 
-        # if fill_value and fill_value.dtype != dtype:
-        #     cast_func = DtypePrecisionToCastFunction[dtype]
-        #     fill_value = cast_func(fill_value)
-        self.shape = shape
-        rank = len(shape)
+        self._shape = shape
+        rank = len(self._shape)
         order = CudaNewarray._process_order(rank, order)
-        class_type = NumpyNDArrayType(dtype, shape, order)
-
-        super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype)
+        class_type = CudaArrayType(dtype, rank, order, 'device')
+        super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = 'device')
 
 
 class CudaAutoFill(CudaFull):
@@ -88,11 +86,9 @@ class CudaAutoFill(CudaFull):
     """
     __slots__ = ()
     def __init__(self, shape, dtype='float', order='C'):
-        if not dtype:
-            raise TypeError("Data type must be provided")
-        super().__init__(shape, None, dtype, order)
+        super().__init__(shape, Nil(), dtype, order)
 
-class CudaEmpty(CudaNewarray):
+class CudaEmpty(CudaAutoFill):
     """
     Represents a call to  Cuda.host_empty for code generation.
 
@@ -109,10 +105,19 @@ class CudaEmpty(CudaNewarray):
     order : str , LiteralString
         The order passed to the function defoulting to 'C'.
     """
-    __slots__ = ()
-
+    __slots__ = ('_shape', '_dtype', '_order')
+    name = 'empty'
     def __init__(self, shape, dtype='float', order='C'):
         super().__init__(shape, dtype, order)
+    
+    @property
+    def fill_value(self):
+        """
+        The value with which the array will be filled on initialisation.
+
+        The value with which the array will be filled on initialisation.
+        """
+        return None
 
 
 class CudaSynchronize(PyccelFunction):
diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index 822c9921b2..07a23ded1b 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -6,13 +6,20 @@
 #------------------------------------------------------------------------------------------#
 """ Module containing types from the numpy module understood by pyccel
 """
+from functools import lru_cache
+import numpy as np
+
+from .datatypes import FixedSizeNumericType, HomogeneousContainerType, PythonNativeBool
+from pyccel.utilities.metaclasses import ArgumentSingleton
+from .datatypes import pyccel_type_to_original_type, original_type_to_pyccel_type
+
 
 class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton):
     """
     Class representing the Cuda array type.
-    
+
     Class representing the Cuda array type
-    
+
     dtype : NumpyNumericType | PythonNativeBool | GenericType
         The internal datatype of the object (GenericType is allowed for external
         libraries, e.g. MPI).
@@ -24,12 +31,12 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton):
         The memory location of the new cuda array.
     """
     __slots__ = ('_dtype', '_rank', '_order', '_memory_location')
-    
-    def __new__(cls, dtype, rank, order):
-        if rank == 0:
-            return dtype
-        else:
-            return super().__new__(cls, dtype, rank, order)
+
+    # def __new__(cls, dtype, rank, order, memory_location):
+    #     if rank == 0:
+    #         return dtype
+    #     else:
+    #         return super().__new__(cls, dtype, rank, order)
     def __init__(self, dtype, rank, order, memory_location):
         assert isinstance(rank, int)
         assert order in (None, 'C', 'F')
@@ -39,7 +46,67 @@ def __init__(self, dtype, rank, order, memory_location):
         self._order = order
         self._memory_location = memory_location
         super().__init__()
-    
+
+    @lru_cache
+    def __add__(self, other):
+        test_type = np.zeros(1, dtype = pyccel_type_to_original_type[self.element_type])
+        if isinstance(other, FixedSizeNumericType):
+            comparison_type = pyccel_type_to_original_type[other]()
+        elif isinstance(other, CudaArrayType):
+            comparison_type = np.zeros(1, dtype = pyccel_type_to_original_type[other.element_type])
+        else:
+            return NotImplemented
+        # Todo need to check for memory location as well
+        result_type = original_type_to_pyccel_type[np.result_type(test_type, comparison_type).type]
+        rank = max(other.rank, self.rank)
+        if rank < 2:
+            order = None
+        else:
+            other_f_contiguous = other.order in (None, 'F')
+            self_f_contiguous = self.order in (None, 'F')
+            order = 'F' if other_f_contiguous and self_f_contiguous else 'C'
+        return CudaArrayType(result_type, rank, order, self.memory_location)
+
     @lru_cache
-    def __add__(self, other)
-    
\ No newline at end of file
+    def __radd__(self, other):
+        return self.__add__(other)
+
+    @lru_cache
+    def __and__(self, other):
+        elem_type = self.element_type
+        if isinstance(other, FixedSizeNumericType):
+            return CudaArrayType(elem_type and other)
+        elif isinstance(other, CudaArrayType):
+            return CudaArrayType(elem_type+other.element_type)
+        else:
+            return NotImplemented
+
+    @lru_cache
+    def __rand__(self, other):
+        return self.__and__(other)
+
+    @property
+    def rank(self):
+        """
+        Number of dimensions of the object.
+
+        Number of dimensions of the object. If the object is a scalar then
+        this is equal to 0.
+        """
+        return self._container_rank
+
+    @property
+    def order(self):
+        """
+        The data layout ordering in memory.
+
+        Indicates whether the data is stored in row-major ('C') or column-major
+        ('F') format. This is only relevant if rank > 1. When it is not relevant
+        this function returns None.
+        """
+        return self._order
+
+    def __repr__(self):
+        dims = ','.join(':'*self._container_rank)
+        order_str = f'(order={self._order})' if self._order else ''
+        return f'{self.element_type}[{dims}]{order_str}'
diff --git a/pyccel/ast/numpyext.py b/pyccel/ast/numpyext.py
index 5c6067bb39..41273f75f7 100644
--- a/pyccel/ast/numpyext.py
+++ b/pyccel/ast/numpyext.py
@@ -626,7 +626,7 @@ def __init__(self, *args, class_type, init_dtype = None):
         assert isinstance(class_type, NumpyNDArrayType)
         self._init_dtype = init_dtype
         self._class_type = class_type # pylint: disable=no-member
-
+        print(*args)
         super().__init__(*args)
 
     @property
@@ -1312,20 +1312,15 @@ class NumpyFull(NumpyNewArray):
     def __init__(self, shape, fill_value, dtype=None, order='C'):
 
         # Convert shape to PythonTuple
-        print(shape)
-        print(type(shape))
         shape = process_shape(False, shape)
-        print(shape)
-        print(type(shape))
-
         init_dtype = dtype
         # If there is no dtype, extract it from fill_value
         # TODO: must get dtype from an annotated node
         if dtype is None:
             dtype = fill_value.dtype
-   
+
         dtype = process_dtype(dtype)
-        
+
         # Cast fill_value to correct type
         if fill_value:
             if fill_value.dtype != dtype:
diff --git a/pyccel/ast/numpytypes.py b/pyccel/ast/numpytypes.py
index 8bc1df828e..1d56ce14e9 100644
--- a/pyccel/ast/numpytypes.py
+++ b/pyccel/ast/numpytypes.py
@@ -282,6 +282,7 @@ def __new__(cls, dtype, rank, order):
             return super().__new__(cls)
 
     def __init__(self, dtype, rank, order):
+        # print("reank", rank)
         assert isinstance(rank, int)
         assert order in (None, 'C', 'F')
         assert rank < 2 or order is not None
@@ -319,6 +320,7 @@ def __radd__(self, other):
 
     @lru_cache
     def __and__(self, other):
+        print("jouj draham diyali aba jalal")
         elem_type = self.element_type
         if isinstance(other, FixedSizeNumericType):
             return NumpyNDArrayType(elem_type and other)
diff --git a/pyccel/ast/test.py b/pyccel/ast/test.py
index c12492de06..96b228ba64 100644
--- a/pyccel/ast/test.py
+++ b/pyccel/ast/test.py
@@ -1,4 +1,22 @@
-import numpy as np
+from pyccel.decorators import device , kernel
+from pyccel import cuda
 
-a = np.full((2, 3), 1, device = 'cpu')
-print(a)
\ No newline at end of file
+@device
+def device_call_2():
+    
+
+@device
+def device_call():
+    device_call_2()
+    print("Hello from device")
+
+@kernel
+def kernel_call():
+    device_call()
+
+def f():
+    kernel_call[1,1]()
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
\ No newline at end of file

From f3911d53dc0cbc98a709a136f8ac153f3a2ff76e Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 3 Jul 2024 22:23:22 +0100
Subject: [PATCH 010/130] work in progress

---
 pyccel/ast/class_defs.py              | 12 +++++++++++-
 pyccel/ast/cudaext.py                 | 19 ++++++++++++++++---
 pyccel/ast/cudatypes.py               |  6 +++---
 pyccel/codegen/compiling/compilers.py |  6 +++---
 pyccel/codegen/pipeline.py            |  2 +-
 pyccel/codegen/printing/ccode.py      |  4 ++++
 pyccel/codegen/printing/cucode.py     |  3 +++
 pyccel/errors/errors.py               |  1 -
 8 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/pyccel/ast/class_defs.py b/pyccel/ast/class_defs.py
index a0c414ae38..9d8065329d 100644
--- a/pyccel/ast/class_defs.py
+++ b/pyccel/ast/class_defs.py
@@ -20,9 +20,11 @@
                          NumpyImag, NumpyReal, NumpyTranspose,
                          NumpyConjugate, NumpySize, NumpyResultType, NumpyArray)
 from .numpytypes import NumpyNumericType, NumpyNDArrayType
+from .cudatypes  import CudaArrayType
 
 __all__ = (
     'BooleanClass',
+    'CudaArrayClass',
     'IntegerClass',
     'FloatClass',
     'ComplexClass',
@@ -34,6 +36,7 @@
     'literal_classes',
     'get_cls_base',
 )
+#=======================================================================================
 
 #=======================================================================================
 
@@ -171,7 +174,9 @@
             #index
             #count
             ])
-
+CudaArrayClass = ClassDef('cuda.array',
+        methods=[]
+        )
 #=======================================================================================
 
 NumpyArrayClass = ClassDef('numpy.ndarray',
@@ -246,8 +251,13 @@ def get_cls_base(class_type):
         return None
     elif class_type in literal_classes:
         return literal_classes[class_type]
+    elif isinstance(class_type, CudaArrayType):
+        return CudaArrayClass
     elif isinstance(class_type, (NumpyNumericType, NumpyNDArrayType)):
         return NumpyArrayClass
+    
+
+    # elif isinstance(class_type, StackArrayType):
     elif isinstance(class_type, TupleType):
         return TupleClass
     elif isinstance(class_type, HomogeneousListType):
diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index 4a534f35b1..e107b6fe6f 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -23,6 +23,8 @@
 __all__ = (
     'CudaSynchronize',
     'CudaNewarray'
+    'CudaFull'
+    'CudaEmpty'
 )
 
 class CudaNewarray(PyccelFunction):
@@ -45,6 +47,16 @@ class CudaNewarray(PyccelFunction):
     """
     __slots__ = ('_class_type', '_init_dtype', '_memory_location')
 
+    property
+    def init_dtype(self):
+        """
+        The dtype provided to the function when it was initialised in Python.
+
+        The dtype provided to the function when it was initialised in Python.
+        If no dtype was provided then this should equal `None`.
+        """
+        return self._init_dtype
+
     def __init__(self, *arg,class_type, init_dtype, memory_location):
         self._class_type = class_type
         self._init_dtype = init_dtype
@@ -78,7 +90,9 @@ def __init__(self, shape, fill_value, dtype='float', order='C'):
         order = CudaNewarray._process_order(rank, order)
         class_type = CudaArrayType(dtype, rank, order, 'device')
         super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = 'device')
-
+    @property
+    def fill_value(self):
+        return self._args[0]
 
 class CudaAutoFill(CudaFull):
     """ Abstract class for all classes which inherit from NumpyFull but
@@ -105,7 +119,7 @@ class CudaEmpty(CudaAutoFill):
     order : str , LiteralString
         The order passed to the function defoulting to 'C'.
     """
-    __slots__ = ('_shape', '_dtype', '_order')
+    __slots__ = ()
     name = 'empty'
     def __init__(self, shape, dtype='float', order='C'):
         super().__init__(shape, dtype, order)
@@ -119,7 +133,6 @@ def fill_value(self):
         """
         return None
 
-
 class CudaSynchronize(PyccelFunction):
     """
     Represents a call to Cuda.synchronize for code generation.
diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index 07a23ded1b..a513f7664e 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -30,7 +30,7 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton):
     memory_location : str
         The memory location of the new cuda array.
     """
-    __slots__ = ('_dtype', '_rank', '_order', '_memory_location')
+    __slots__ = ('_element_type', '_container_rank', '_order', '_memory_location')
 
     # def __new__(cls, dtype, rank, order, memory_location):
     #     if rank == 0:
@@ -41,8 +41,8 @@ def __init__(self, dtype, rank, order, memory_location):
         assert isinstance(rank, int)
         assert order in (None, 'C', 'F')
 
-        self._dtype = dtype
-        self._rank = rank
+        self._element_type = dtype
+        self._container_rank = rank
         self._order = order
         self._memory_location = memory_location
         super().__init__()
diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py
index ef11579e49..9a50b42066 100644
--- a/pyccel/codegen/compiling/compilers.py
+++ b/pyccel/codegen/compiling/compilers.py
@@ -136,9 +136,9 @@ def _get_exec(self, accelerators):
         # Reset PATH variable
         os.environ['PATH'] = current_path
 
-        if exec_loc is None:
-            errors.report(f"Could not find compiler ({exec_cmd})",
-                    severity='fatal')
+        
+        errors.report(f"Could not find compiler ({exec_cmd})",
+                severity='fatal')
 
         return exec_loc
 
diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py
index 1e9d0e327d..eb357fab74 100644
--- a/pyccel/codegen/pipeline.py
+++ b/pyccel/codegen/pipeline.py
@@ -389,7 +389,7 @@ def get_module_dependencies(parser, deps):
                 output_folder=pyccel_dirpath,
                 verbose=verbose)
     except Exception:
-        handle_error('Fortran compilation')
+        handle_error('compilation')
         raise
 
 
diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index a39a442a83..07e592745c 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -45,6 +45,7 @@
 from pyccel.ast.numpytypes import NumpyInt8Type, NumpyInt16Type, NumpyInt32Type, NumpyInt64Type
 from pyccel.ast.numpytypes import NumpyFloat32Type, NumpyFloat64Type, NumpyComplex64Type, NumpyComplex128Type
 from pyccel.ast.numpytypes import NumpyNDArrayType, numpy_precision_map
+from pyccel.ast.cudatypes  import CudaArrayType
 
 from pyccel.ast.utilities import expand_to_loops
 
@@ -1311,6 +1312,9 @@ def get_declare_type(self, expr):
                     errors.report(UNSUPPORTED_ARRAY_RANK, symbol=expr, severity='fatal')
                 self.add_import(c_imports['ndarrays'])
                 dtype = 't_ndarray'
+            elif isinstance(expr.class_type, CudaArrayType):
+                dtype = 't_cuda'
+             
             else:
                 errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal')
         elif not isinstance(class_type, CustomDataType):
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index cd26843017..168c89c6d4 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -100,6 +100,9 @@ def _print_KernelCall(self, expr):
     def _print_CudaSynchronize(self, expr):
         return 'cudaDeviceSynchronize();\n'
 
+    def _print_CudaEmpty(self, expr):
+        print(expr)
+        return 'cudaDeviceSynchronize();\n'
     def _print_ModuleHeader(self, expr):
         self.set_scope(expr.module.scope)
         self._in_header = True
diff --git a/pyccel/errors/errors.py b/pyccel/errors/errors.py
index f4172820db..96910b3dfa 100644
--- a/pyccel/errors/errors.py
+++ b/pyccel/errors/errors.py
@@ -345,7 +345,6 @@ def report(self,
                 traceback = ''.join(tb.format_stack(limit=5))
         else:
             traceback = None
-        print(pyccel_stage.current_stage)
         info = ErrorInfo(stage=pyccel_stage.current_stage,
                          filename=filename,
                          message=message,

From 37289f9a87cdafd6f52753c3d9821124c798f828 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Mon, 8 Jul 2024 14:16:40 +0100
Subject: [PATCH 011/130] work in progress

---
 pyccel/codegen/compiling/compilers.py        |  4 ++--
 pyccel/codegen/printing/cucode.py            | 22 ++++++++++++++++++++
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu | 22 ++++++++++++++++++++
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h  | 10 +++++++++
 4 files changed, 56 insertions(+), 2 deletions(-)
 create mode 100644 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
 create mode 100644 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h

diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py
index 9a50b42066..d99ad02bbd 100644
--- a/pyccel/codegen/compiling/compilers.py
+++ b/pyccel/codegen/compiling/compilers.py
@@ -137,8 +137,8 @@ def _get_exec(self, accelerators):
         os.environ['PATH'] = current_path
 
         
-        errors.report(f"Could not find compiler ({exec_cmd})",
-                severity='fatal')
+        # errors.report(f"Could not find compiler ({exec_cmd})",
+        #         severity='fatal')
 
         return exec_loc
 
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 168c89c6d4..a5a4027937 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -15,12 +15,17 @@
 from pyccel.ast.literals            import Nil
 
 from pyccel.errors.errors           import Errors
+from pyccel.ast.core                import Allocate, Deallocate
+
 
 
 errors = Errors()
 
 __all__ = ["CudaCodePrinter"]
 
+c_imports = {n : Import(n, Module(n, (), ())) for n in
+                ['cuda_ndarrays',]}
+
 class CudaCodePrinter(CCodePrinter):
     """
     Print code in CUDA format.
@@ -133,4 +138,21 @@ def _print_ModuleHeader(self, expr):
                           global_variables,
                           function_declaration,
                           "#endif // {name.upper()}_H\n"))
+    def _print_Allocate(self, expr):
+        self.add_import('cuda_ndarrays')
+        free_code = ''
+        
+        
+        #free the array if its already allocated and checking if its not null if the status is unknown
+        # if  (expr.status == 'unknown'):
+        #     free_code = 'if (%s.shape != NULL)\n' % self._print(expr.variable.name)
+        #     free_code += "{{\n{}}}\n".format(self._print(Deallocate(expr.variable)))
+        # elif  (expr.status == 'allocated'):
+        #     free_code += self._print(Deallocate(expr.variable))
+    
+        alloc_code = f"{self._print(expr.variable)} = cuda_array_create();\n"
+        return f'{alloc_code}'
+        # print(shape)
+        
+        # return "hjsjkahsjkajskasjkasj"
 
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
new file mode 100644
index 0000000000..cb97ef759f
--- /dev/null
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
@@ -0,0 +1,22 @@
+#include "cuda_ndarrays.h"
+
+void *cuda_array_create(int shape[])
+{
+    size_t i = 0;
+    size_t alloc_size = 1;
+
+    while (shape[i] != 0)
+    {
+        alloc_size *= shape[i];
+        i++;
+    }
+
+    void *array_ptr = malloc(alloc_size);
+    if (array_ptr == NULL)
+    {
+        cout << "Error allocating memory" << endl;
+        return NULL;
+    }
+
+    return array_ptr;
+}
\ No newline at end of file
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
new file mode 100644
index 0000000000..5b176390d6
--- /dev/null
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
@@ -0,0 +1,10 @@
+#ifndef CUDA_NDARRAYS_H
+# define CUDA_NDARRAYS_H
+
+# include <cuda_runtime.h>
+# include <iostream>
+
+using namespace std;
+
+
+#endif
\ No newline at end of file

From ba66b4834e72705b394f31ad994557ec771d1fca Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Mon, 8 Jul 2024 14:33:33 +0100
Subject: [PATCH 012/130] work in progress

---
 pyccel/codegen/printing/cucode.py | 13 ++-----------
 pyccel/codegen/utilities.py       |  1 +
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index a5a4027937..1d35ddbca3 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -139,17 +139,8 @@ def _print_ModuleHeader(self, expr):
                           function_declaration,
                           "#endif // {name.upper()}_H\n"))
     def _print_Allocate(self, expr):
-        self.add_import('cuda_ndarrays')
-        free_code = ''
-        
-        
-        #free the array if its already allocated and checking if its not null if the status is unknown
-        # if  (expr.status == 'unknown'):
-        #     free_code = 'if (%s.shape != NULL)\n' % self._print(expr.variable.name)
-        #     free_code += "{{\n{}}}\n".format(self._print(Deallocate(expr.variable)))
-        # elif  (expr.status == 'allocated'):
-        #     free_code += self._print(Deallocate(expr.variable))
-    
+ 
+        self.add_import(c_imports['cuda_ndarrays'])
         alloc_code = f"{self._print(expr.variable)} = cuda_array_create();\n"
         return f'{alloc_code}'
         # print(shape)
diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py
index ceffc483e3..7d81744fe9 100644
--- a/pyccel/codegen/utilities.py
+++ b/pyccel/codegen/utilities.py
@@ -36,6 +36,7 @@
 # The compile object folder will be in the pyccel dirpath
 internal_libs = {
     "ndarrays"     : ("ndarrays", CompileObj("ndarrays.c",folder="ndarrays")),
+    "cuda_ndarrays": ("cuda_ndarrays", CompileObj("cuda_ndarrays.cu",folder="ndarrays")),
     "pyc_math_f90" : ("math", CompileObj("pyc_math_f90.f90",folder="math")),
     "pyc_math_c"   : ("math", CompileObj("pyc_math_c.c",folder="math")),
     "cwrapper"     : ("cwrapper", CompileObj("cwrapper.c",folder="cwrapper", accelerators=('python',))),

From 406a88b965dd17f42e4886f5f61cfc5814246c7d Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Mon, 8 Jul 2024 15:11:12 +0100
Subject: [PATCH 013/130] work in progress

---
 pyccel/codegen/printing/ccode.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index 07e592745c..6ab084a989 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -1313,7 +1313,7 @@ def get_declare_type(self, expr):
                 self.add_import(c_imports['ndarrays'])
                 dtype = 't_ndarray'
             elif isinstance(expr.class_type, CudaArrayType):
-                dtype = 't_cuda'
+                dtype = 'int *'
              
             else:
                 errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal')

From 3afad1b06c0974453adc1d3aa9a5eccb3f62aa9f Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Tue, 9 Jul 2024 15:39:25 +0100
Subject: [PATCH 014/130] work in progress

---
 pyccel/ast/variable.py                       |  42 +++++++-
 pyccel/codegen/printing/ccode.py             |   3 +-
 pyccel/codegen/printing/cucode.py            |  43 ++++++--
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu | 101 ++++++++++++++++---
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h  |   1 +
 pyccel/stdlib/ndarrays/ndarrays.h            |   7 ++
 6 files changed, 176 insertions(+), 21 deletions(-)

diff --git a/pyccel/ast/variable.py b/pyccel/ast/variable.py
index 051cf631b7..93f61f3698 100644
--- a/pyccel/ast/variable.py
+++ b/pyccel/ast/variable.py
@@ -56,6 +56,11 @@ class Variable(TypedAstNode):
         'stack' if memory should be allocated on the stack, represents stack arrays and scalars.
         'alias' if object allows access to memory stored in another variable.
 
+    memory_location: str, default: 'host'
+        'host' the variable can only be accessed by the CPU.
+        'device' the variable can only be accessed by the GPU.
+        'managed' the variable can be accessed by CPU and GPU and is being managed by the Cuda API (memory transfer is being done implicitly).
+
     is_const : bool, default: False
         Indicates if object is a const argument of a function.
 
@@ -98,7 +103,7 @@ class Variable(TypedAstNode):
     >>> Variable(PythonNativeInt(), DottedName('matrix', 'n_rows'))
     matrix.n_rows
     """
-    __slots__ = ('_name', '_alloc_shape', '_memory_handling', '_is_const', '_is_target',
+    __slots__ = ('_name', '_alloc_shape', '_memory_handling', '_memory_location', '_is_const', '_is_target',
             '_is_optional', '_allows_negative_indexes', '_cls_base', '_is_argument', '_is_temp',
             '_shape','_is_private','_class_type')
     _attribute_nodes = ()
@@ -109,6 +114,7 @@ def __init__(
         name,
         *,
         memory_handling='stack',
+        memory_location='host',
         is_const=False,
         is_target=False,
         is_optional=False,
@@ -141,6 +147,10 @@ def __init__(
             raise ValueError("memory_handling must be 'heap', 'stack' or 'alias'")
         self._memory_handling = memory_handling
 
+        if memory_location not in ('host', 'device', 'managed'):
+            raise ValueError("memory_location must be 'host', 'device' or 'managed'")
+        self._memory_location = memory_location
+
         if not isinstance(is_const, bool):
             raise TypeError('is_const must be a boolean.')
         self._is_const = is_const
@@ -323,6 +333,36 @@ def cls_base(self):
         """
         return self._cls_base
 
+    @property
+    def memory_location(self):
+        """ Indicates whether a Variable has a dynamic size
+        """
+        return self._memory_location
+
+    @memory_location.setter
+    def memory_location(self, memory_location):
+        if memory_location not in ('host', 'device', 'managed'):
+            raise ValueError("memory_location must be 'host', 'device' or 'managed'")
+        self._memory_location = memory_location
+
+    @property
+    def on_host(self):
+        """  Indicates if memory is only accessible by the CPU
+        """
+        return self.memory_location == 'host'
+
+    @property
+    def on_device(self):
+        """ Indicates if memory is only accessible by the GPU
+        """
+        return self.memory_location == 'device'
+
+    @property
+    def is_managed(self):
+        """ Indicates if memory is being managed by CUDA API
+        """
+        return self.memory_location == 'managed'
+
     @property
     def is_const(self):
         """
diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index 6ab084a989..12fd5dbdeb 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -1313,7 +1313,8 @@ def get_declare_type(self, expr):
                 self.add_import(c_imports['ndarrays'])
                 dtype = 't_ndarray'
             elif isinstance(expr.class_type, CudaArrayType):
-                dtype = 'int *'
+                self.add_import(c_imports['ndarrays'])
+                dtype = 't_ndarray'
              
             else:
                 errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal')
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 1d35ddbca3..1c01f1d45d 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -16,6 +16,14 @@
 
 from pyccel.errors.errors           import Errors
 from pyccel.ast.core                import Allocate, Deallocate
+from pyccel.ast.numpytypes          import   NumpyInt64Type
+from pyccel.ast.cudatypes           import CudaArrayType
+from pyccel.ast.datatypes           import HomogeneousContainerType
+from pyccel.ast.numpytypes          import NumpyNDArrayType, numpy_precision_map
+
+
+
+
 
 
 
@@ -24,7 +32,9 @@
 __all__ = ["CudaCodePrinter"]
 
 c_imports = {n : Import(n, Module(n, (), ())) for n in
-                ['cuda_ndarrays',]}
+                ['cuda_ndarrays',
+                 'ndarrays',
+                 ]}
 
 class CudaCodePrinter(CCodePrinter):
     """
@@ -139,11 +149,32 @@ def _print_ModuleHeader(self, expr):
                           function_declaration,
                           "#endif // {name.upper()}_H\n"))
     def _print_Allocate(self, expr):
- 
+        variable = expr.variable
+        shape = ", ".join(self._print(i) for i in expr.shape)
+        if isinstance(variable.class_type, CudaArrayType):
+            dtype = self.find_in_ndarray_type_registry(variable.dtype)
+        elif isinstance(variable.class_type, HomogeneousContainerType):
+            dtype = self.find_in_ndarray_type_registry(numpy_precision_map[(variable.dtype.primitive_type, variable.dtype.precision)])
+        else:
+            raise NotImplementedError(f"Don't know how to index {variable.class_type} type")
+        shape_dtype = self.get_c_type(NumpyInt64Type())
+        shape_Assign = "("+ shape_dtype +"[]){" + shape + "}"
+        is_view = 'false' if variable.on_heap else 'true'
+        memory_location = expr.variable.memory_location
+        if memory_location in ('device', 'host'):
+            memory_location = 'allocateMemoryOn' + str(memory_location).capitalize()
+        else:
+            memory_location = 'managedMemory'
         self.add_import(c_imports['cuda_ndarrays'])
-        alloc_code = f"{self._print(expr.variable)} = cuda_array_create();\n"
+        self.add_import(c_imports['ndarrays'])
+        alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank},  {shape_Assign}, {dtype}, {is_view},{memory_location});\n"
         return f'{alloc_code}'
-        # print(shape)
-        
-        # return "hjsjkahsjkajskasjkasj"
+
+    def _print_Deallocate(self, expr):
+        var_code = self._print(expr.variable)
+
+        if expr.variable.memory_location == 'host':
+            return f"cuda_free_host({var_code});\n"
+        else:
+            return f"cuda_free({var_code});\n"
 
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
index cb97ef759f..f74e8630f3 100644
--- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
@@ -1,22 +1,97 @@
 #include "cuda_ndarrays.h"
 
-void *cuda_array_create(int shape[])
+void    device_memory(void** devPtr, size_t size)
 {
-    size_t i = 0;
-    size_t alloc_size = 1;
+    cudaMalloc(devPtr, size);
+}
 
-    while (shape[i] != 0)
+void    managed_memory(void** devPtr, size_t size)
+{
+    cudaMallocManaged(devPtr, size);
+}
+
+void    host_memory(void** devPtr, size_t size)
+{
+    cudaMallocHost(devPtr, size);
+}
+t_ndarray   cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape,
+        enum e_types type, bool is_view)
+{
+    t_ndarray arr;
+    void (*fun_ptr_arr[])(void**, size_t) = {managed_memory, host_memory, device_memory};
+
+    arr.nd = nd;
+    arr.type = type;
+    switch (type)
     {
-        alloc_size *= shape[i];
-        i++;
+        case nd_int8:
+            arr.type_size = sizeof(int8_t);
+            break;
+        case nd_int16:
+            arr.type_size = sizeof(int16_t);
+            break;
+        case nd_int32:
+            arr.type_size = sizeof(int32_t);
+            break;
+        case nd_int64:
+            arr.type_size = sizeof(int64_t);
+            break;
+        case nd_float:
+            arr.type_size = sizeof(float);
+            break;
+        case nd_double:
+            arr.type_size = sizeof(double);
+            break;
+        case nd_bool:
+            arr.type_size = sizeof(bool);
+            break;
     }
-
-    void *array_ptr = malloc(alloc_size);
-    if (array_ptr == NULL)
+    arr.is_view = is_view;
+    arr.length = 1;
+    arr.shape = (int64_t *)malloc(arr.nd * sizeof(int64_t));
+    for (int32_t i = 0; i < arr.nd; i++)
     {
-        cout << "Error allocating memory" << endl;
-        return NULL;
+        arr.length *= shape[i];
+        arr.shape[i] = shape[i];
     }
+    arr.buffer_size = arr.length * arr.type_size;
 
-    return array_ptr;
-}
\ No newline at end of file
+    if (!is_view)
+        (*fun_ptr_arr[location])(&(arr.raw_data), arr.buffer_size);
+    return (arr);
+}
+
+int32_t cuda_free_host(t_ndarray arr)
+{
+    if (arr.shape == NULL)
+        return (0);
+    cudaFreeHost(arr.raw_data);
+    arr.raw_data = NULL;
+    cudaFree(arr.shape);
+    arr.shape = NULL;
+    cudaFree(arr.strides);
+    arr.strides = NULL;
+    return (1);
+}
+
+__host__ __device__
+int32_t cuda_free(t_ndarray arr)
+{
+    if (arr.shape == NULL)
+        return (0);
+    cudaFree(arr.raw_data);
+    arr.raw_data = NULL;
+    cudaFree(arr.shape);
+    arr.shape = NULL;
+    return (0);
+}
+
+__host__ __device__
+int32_t cuda_free_pointer(t_ndarray arr)
+{
+    if (arr.is_view == false || arr.shape == NULL)
+        return (0);
+    cudaFree(arr.shape);
+    arr.shape = NULL;
+    return (0);
+}
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
index 5b176390d6..9b665cc96a 100644
--- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
@@ -3,6 +3,7 @@
 
 # include <cuda_runtime.h>
 # include <iostream>
+#include "../ndarrays/ndarrays.h"
 
 using namespace std;
 
diff --git a/pyccel/stdlib/ndarrays/ndarrays.h b/pyccel/stdlib/ndarrays/ndarrays.h
index 11bbfbf455..082146d639 100644
--- a/pyccel/stdlib/ndarrays/ndarrays.h
+++ b/pyccel/stdlib/ndarrays/ndarrays.h
@@ -80,6 +80,13 @@ typedef enum e_order
     order_c,
 } t_order;
 
+enum e_memory_locations
+{
+        managedMemory,
+        allocateMemoryOnHost,
+        allocateMemoryOnDevice
+};
+
 typedef struct  s_ndarray
 {
     /* raw data buffer*/

From 190c5a29d5fbc075316d80d00b662036cc031e2e Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 10 Jul 2024 09:16:19 +0100
Subject: [PATCH 015/130] work in progress

---
 pyccel/codegen/printing/ccode.py            | 5 +++++
 pyccel/codegen/printing/cucode.py           | 9 ++++-----
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h | 5 +++++
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index 12fd5dbdeb..26868d5522 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -46,6 +46,7 @@
 from pyccel.ast.numpytypes import NumpyFloat32Type, NumpyFloat64Type, NumpyComplex64Type, NumpyComplex128Type
 from pyccel.ast.numpytypes import NumpyNDArrayType, numpy_precision_map
 from pyccel.ast.cudatypes  import CudaArrayType
+from pyccel.ast.cudaext    import CudaFull
 
 from pyccel.ast.utilities import expand_to_loops
 
@@ -59,6 +60,7 @@
 
 from pyccel.codegen.printing.codeprinter import CodePrinter
 
+
 from pyccel.errors.errors   import Errors
 from pyccel.errors.messages import (PYCCEL_RESTRICTION_TODO, INCOMPATIBLE_TYPEVAR_TO_FUNC,
                                     PYCCEL_RESTRICTION_IS_ISNOT, UNSUPPORTED_ARRAY_RANK)
@@ -2181,6 +2183,9 @@ def _print_Assign(self, expr):
         # Inhomogenous tuples are unravelled and therefore do not exist in the c printer
         if isinstance(rhs, (NumpyArray, PythonTuple)):
             return prefix_code+self.copy_NumpyArray_Data(expr)
+        if(isinstance(rhs, (CudaFull))):
+            # TODO add support for CudaFull
+            return " \n"
         if isinstance(rhs, (NumpyFull)):
             return prefix_code+self.arrayFill(expr)
         lhs = self._print(expr.lhs)
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 1c01f1d45d..a90b4513c3 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -116,8 +116,7 @@ def _print_CudaSynchronize(self, expr):
         return 'cudaDeviceSynchronize();\n'
 
     def _print_CudaEmpty(self, expr):
-        print(expr)
-        return 'cudaDeviceSynchronize();\n'
+        return 'cuda_array_create(1,  (int64_t[]){INT64_C(10)}, nd_double, false,allocateMemoryOnHost);\n'
     def _print_ModuleHeader(self, expr):
         self.set_scope(expr.module.scope)
         self._in_header = True
@@ -158,7 +157,7 @@ def _print_Allocate(self, expr):
         else:
             raise NotImplementedError(f"Don't know how to index {variable.class_type} type")
         shape_dtype = self.get_c_type(NumpyInt64Type())
-        shape_Assign = "("+ shape_dtype +"[]){" + shape + "}"
+        shape_Assign = "int64_t shape_Assign [] = {" + shape + "};\n"
         is_view = 'false' if variable.on_heap else 'true'
         memory_location = expr.variable.memory_location
         if memory_location in ('device', 'host'):
@@ -167,8 +166,8 @@ def _print_Allocate(self, expr):
             memory_location = 'managedMemory'
         self.add_import(c_imports['cuda_ndarrays'])
         self.add_import(c_imports['ndarrays'])
-        alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank},  {shape_Assign}, {dtype}, {is_view},{memory_location});\n"
-        return f'{alloc_code}'
+        alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank},  shape_Assign, {dtype}, {is_view},{memory_location});\n"
+        return f'{shape_Assign} {alloc_code}'
 
     def _print_Deallocate(self, expr):
         var_code = self._print(expr.variable)
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
index 9b665cc96a..fc571685f1 100644
--- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
@@ -5,6 +5,11 @@
 # include <iostream>
 #include "../ndarrays/ndarrays.h"
 
+t_ndarray   cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view ,
+enum e_memory_locations location);
+int32_t cuda_free_host(t_ndarray arr);
+
+
 using namespace std;
 
 

From eeeb2492b498e42a5b131c13c932211ed114940d Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 10 Jul 2024 09:25:24 +0100
Subject: [PATCH 016/130] cleaning up my PR

---
 pyccel/ast/test.cu | 46 ----------------------------------------------
 pyccel/ast/test.py | 22 ----------------------
 2 files changed, 68 deletions(-)
 delete mode 100644 pyccel/ast/test.cu
 delete mode 100644 pyccel/ast/test.py

diff --git a/pyccel/ast/test.cu b/pyccel/ast/test.cu
deleted file mode 100644
index 5938aa2d6d..0000000000
--- a/pyccel/ast/test.cu
+++ /dev/null
@@ -1,46 +0,0 @@
-#include <iostream>
-#include <cuda_runtime.h>
-
-__global__ void add(int *a, int *b, int *c, int n) {
-    int index = threadIdx.x + blockIdx.x * blockDim.x;
-    if (index < n) {
-        c[index] = a[index] + b[index];
-    }
-}
-
-int main() {
-    int n = 512;
-    int size = n * sizeof(int);
-    int *a, *b, *c;
-
-    // Allocate unified memory - accessible from CPU or GPU
-    cudaMallocManaged(&a, size);
-    cudaMallocManaged(&b, size);
-    cudaMallocManaged(&c, size);
-
-    // Initialize arrays on the host (CPU)
-    for (int i = 0; i < n; i++) {
-        a[i] = i;
-        b[i] = i * 2;
-    }
-
-    // Launch kernel with n threads
-    int blockSize = 256;
-    int numBlocks = (n + blockSize - 1) / blockSize;
-    add<<<numBlocks, blockSize>>>(a, b, c, n);
-
-    // Wait for GPU to finish before accessing on host
-    cudaDeviceSynchronize();
-
-    // Verify the result
-    for (int i = 0; i < n; i++) {
-        if (c[i] != a[i] + b[i]) {
-            std::cerr << "Error at index " << i << ": " << c[i] << " != " << a[i] + b[i] << std::endl
-                      << std::endl;
-            return 1;
-        }
-    }
-
-    std::cout << "Success!" << std::endl;
-    return 0;
-}
diff --git a/pyccel/ast/test.py b/pyccel/ast/test.py
deleted file mode 100644
index 96b228ba64..0000000000
--- a/pyccel/ast/test.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from pyccel.decorators import device , kernel
-from pyccel import cuda
-
-@device
-def device_call_2():
-    
-
-@device
-def device_call():
-    device_call_2()
-    print("Hello from device")
-
-@kernel
-def kernel_call():
-    device_call()
-
-def f():
-    kernel_call[1,1]()
-    cuda.synchronize()
-
-if __name__ == '__main__':
-    f()
\ No newline at end of file

From de0f5abdcfad9af3c94a3f4297930cad77a665e4 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 10 Jul 2024 09:30:36 +0100
Subject: [PATCH 017/130] cleaning up my PR

---
 pyccel/codegen/compiling/compilers.py | 6 +++---
 pyccel/codegen/pipeline.py            | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py
index d99ad02bbd..ef11579e49 100644
--- a/pyccel/codegen/compiling/compilers.py
+++ b/pyccel/codegen/compiling/compilers.py
@@ -136,9 +136,9 @@ def _get_exec(self, accelerators):
         # Reset PATH variable
         os.environ['PATH'] = current_path
 
-        
-        # errors.report(f"Could not find compiler ({exec_cmd})",
-        #         severity='fatal')
+        if exec_loc is None:
+            errors.report(f"Could not find compiler ({exec_cmd})",
+                    severity='fatal')
 
         return exec_loc
 
diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py
index eb357fab74..1e9d0e327d 100644
--- a/pyccel/codegen/pipeline.py
+++ b/pyccel/codegen/pipeline.py
@@ -389,7 +389,7 @@ def get_module_dependencies(parser, deps):
                 output_folder=pyccel_dirpath,
                 verbose=verbose)
     except Exception:
-        handle_error('compilation')
+        handle_error('Fortran compilation')
         raise
 
 

From d6ba6ad77c071c21e588cbd4e686ce8135d21e9a Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 10 Jul 2024 09:53:25 +0100
Subject: [PATCH 018/130] cleaning up my PR

---
 pyccel/codegen/printing/cucode.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index a90b4513c3..d911f7b727 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -115,8 +115,6 @@ def _print_KernelCall(self, expr):
     def _print_CudaSynchronize(self, expr):
         return 'cudaDeviceSynchronize();\n'
 
-    def _print_CudaEmpty(self, expr):
-        return 'cuda_array_create(1,  (int64_t[]){INT64_C(10)}, nd_double, false,allocateMemoryOnHost);\n'
     def _print_ModuleHeader(self, expr):
         self.set_scope(expr.module.scope)
         self._in_header = True

From 8286a8933b45e039b9b5aaa11e8777f0ed569d55 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 10 Jul 2024 16:01:34 +0100
Subject: [PATCH 019/130] work in progress

---
 pyccel/ast/cudatypes.py                      |  5 --
 pyccel/codegen/printing/ccode.py             |  3 +-
 pyccel/codegen/printing/cucode.py            |  1 -
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu | 10 ++--
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h  | 57 ++++++++++++++++++--
 5 files changed, 60 insertions(+), 16 deletions(-)

diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index a513f7664e..5731aa6957 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -32,11 +32,6 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton):
     """
     __slots__ = ('_element_type', '_container_rank', '_order', '_memory_location')
 
-    # def __new__(cls, dtype, rank, order, memory_location):
-    #     if rank == 0:
-    #         return dtype
-    #     else:
-    #         return super().__new__(cls, dtype, rank, order)
     def __init__(self, dtype, rank, order, memory_location):
         assert isinstance(rank, int)
         assert order in (None, 'C', 'F')
diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index 26868d5522..d0620ccb84 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -1315,8 +1315,7 @@ def get_declare_type(self, expr):
                 self.add_import(c_imports['ndarrays'])
                 dtype = 't_ndarray'
             elif isinstance(expr.class_type, CudaArrayType):
-                self.add_import(c_imports['ndarrays'])
-                dtype = 't_ndarray'
+                dtype = 't_cuda_ndarray'
              
             else:
                 errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal')
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index d911f7b727..6b7b47f1b6 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -163,7 +163,6 @@ def _print_Allocate(self, expr):
         else:
             memory_location = 'managedMemory'
         self.add_import(c_imports['cuda_ndarrays'])
-        self.add_import(c_imports['ndarrays'])
         alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank},  shape_Assign, {dtype}, {is_view},{memory_location});\n"
         return f'{shape_Assign} {alloc_code}'
 
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
index f74e8630f3..d813540707 100644
--- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
@@ -14,10 +14,10 @@ void    host_memory(void** devPtr, size_t size)
 {
     cudaMallocHost(devPtr, size);
 }
-t_ndarray   cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape,
+t_cuda_ndarray   cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape,
         enum e_types type, bool is_view)
 {
-    t_ndarray arr;
+    t_cuda_ndarray arr;
     void (*fun_ptr_arr[])(void**, size_t) = {managed_memory, host_memory, device_memory};
 
     arr.nd = nd;
@@ -61,7 +61,7 @@ t_ndarray   cuda_array_create(enum e_memory_locations location, int32_t nd, int6
     return (arr);
 }
 
-int32_t cuda_free_host(t_ndarray arr)
+int32_t cuda_free_host(t_cuda_ndarray arr)
 {
     if (arr.shape == NULL)
         return (0);
@@ -75,7 +75,7 @@ int32_t cuda_free_host(t_ndarray arr)
 }
 
 __host__ __device__
-int32_t cuda_free(t_ndarray arr)
+int32_t cuda_free(t_cuda_ndarray arr)
 {
     if (arr.shape == NULL)
         return (0);
@@ -87,7 +87,7 @@ int32_t cuda_free(t_ndarray arr)
 }
 
 __host__ __device__
-int32_t cuda_free_pointer(t_ndarray arr)
+int32_t cuda_free_pointer(t_cuda_ndarray arr)
 {
     if (arr.is_view == false || arr.shape == NULL)
         return (0);
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
index fc571685f1..af586b7ac8 100644
--- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
@@ -3,11 +3,62 @@
 
 # include <cuda_runtime.h>
 # include <iostream>
-#include "../ndarrays/ndarrays.h"
 
-t_ndarray   cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view ,
+typedef enum e_types
+{
+        nd_bool     = 0,
+        nd_int8     = 1,
+        nd_int16    = 3,
+        nd_int32    = 5,
+        nd_int64    = 7,
+        nd_float    = 11,
+        nd_double   = 12,
+        nd_cfloat   = 14,
+        nd_cdouble  = 15
+} t_types;
+
+
+enum e_memory_locations
+{
+        managedMemory,
+        allocateMemoryOnHost,
+        allocateMemoryOnDevice
+};
+
+typedef enum e_order
+{
+    order_f,
+    order_c,
+} t_order;
+
+typedef struct  s_cuda_ndarray
+{
+    void            *raw_data;
+    /* number of dimensions */
+    int32_t                 nd;
+    /* shape 'size of each dimension' */
+    int64_t                 *shape;
+    /* strides 'number of elements to skip to get the next element' */
+    int64_t                 *strides;
+    /* type of the array elements */
+    t_types            type;
+    /* type size of the array elements */
+    int32_t                 type_size;
+    /* number of element in the array */
+    int32_t                 length;
+    /* size of the array */
+    int32_t                 buffer_size;
+    /* True if the array does not own the data */
+    bool                    is_view;
+    /* stores the order of the array: order_f or order_c */
+    t_order            order;
+}               t_cuda_ndarray;
+
+
+t_cuda_ndarray  cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view ,
 enum e_memory_locations location);
-int32_t cuda_free_host(t_ndarray arr);
+int32_t cuda_free_host(t_cuda_ndarray arr);
+
 
 
 using namespace std;

From 96c3f292f8532f0f396bf8016af9f0f9cc6e8ea3 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 10 Jul 2024 16:28:43 +0100
Subject: [PATCH 020/130] work in progress

---
 pyccel/ast/numpyext.py            |  1 -
 pyccel/codegen/printing/ccode.py  |  3 ---
 pyccel/codegen/printing/cucode.py | 19 +++++++++++++++----
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/pyccel/ast/numpyext.py b/pyccel/ast/numpyext.py
index 41273f75f7..eb1ee92e26 100644
--- a/pyccel/ast/numpyext.py
+++ b/pyccel/ast/numpyext.py
@@ -626,7 +626,6 @@ def __init__(self, *args, class_type, init_dtype = None):
         assert isinstance(class_type, NumpyNDArrayType)
         self._init_dtype = init_dtype
         self._class_type = class_type # pylint: disable=no-member
-        print(*args)
         super().__init__(*args)
 
     @property
diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index d0620ccb84..ec37735dff 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -2182,9 +2182,6 @@ def _print_Assign(self, expr):
         # Inhomogenous tuples are unravelled and therefore do not exist in the c printer
         if isinstance(rhs, (NumpyArray, PythonTuple)):
             return prefix_code+self.copy_NumpyArray_Data(expr)
-        if(isinstance(rhs, (CudaFull))):
-            # TODO add support for CudaFull
-            return " \n"
         if isinstance(rhs, (NumpyFull)):
             return prefix_code+self.arrayFill(expr)
         lhs = self._print(expr.lhs)
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 6b7b47f1b6..e0b1b2d1c7 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -15,11 +15,10 @@
 from pyccel.ast.literals            import Nil
 
 from pyccel.errors.errors           import Errors
-from pyccel.ast.core                import Allocate, Deallocate
-from pyccel.ast.numpytypes          import   NumpyInt64Type
 from pyccel.ast.cudatypes           import CudaArrayType
 from pyccel.ast.datatypes           import HomogeneousContainerType
-from pyccel.ast.numpytypes          import NumpyNDArrayType, numpy_precision_map
+from pyccel.ast.numpytypes          import numpy_precision_map
+from pyccel.ast.cudaext             import CudaFull
 
 
 
@@ -147,6 +146,8 @@ def _print_ModuleHeader(self, expr):
                           "#endif // {name.upper()}_H\n"))
     def _print_Allocate(self, expr):
         variable = expr.variable
+        if not isinstance(variable.class_type, CudaArrayType):
+            return super()._print_Allocate(expr)
         shape = ", ".join(self._print(i) for i in expr.shape)
         if isinstance(variable.class_type, CudaArrayType):
             dtype = self.find_in_ndarray_type_registry(variable.dtype)
@@ -154,7 +155,6 @@ def _print_Allocate(self, expr):
             dtype = self.find_in_ndarray_type_registry(numpy_precision_map[(variable.dtype.primitive_type, variable.dtype.precision)])
         else:
             raise NotImplementedError(f"Don't know how to index {variable.class_type} type")
-        shape_dtype = self.get_c_type(NumpyInt64Type())
         shape_Assign = "int64_t shape_Assign [] = {" + shape + "};\n"
         is_view = 'false' if variable.on_heap else 'true'
         memory_location = expr.variable.memory_location
@@ -169,8 +169,19 @@ def _print_Allocate(self, expr):
     def _print_Deallocate(self, expr):
         var_code = self._print(expr.variable)
 
+        if not isinstance(expr.variable.class_type, CudaArrayType):
+            return super()._print_Deallocate(expr)
+
         if expr.variable.memory_location == 'host':
             return f"cuda_free_host({var_code});\n"
         else:
             return f"cuda_free({var_code});\n"
 
+    def _print_Assign(self, expr):
+        rhs = expr.rhs
+        if not isinstance(rhs.class_type, CudaArrayType):
+                return super()._print_Assign(expr)
+        if(isinstance(rhs, (CudaFull))):
+            # TODO add support for CudaFull
+            return " \n"
+

From b414d6209d01ae381f87507b8fbc07f6c7a23bb3 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 10 Jul 2024 16:42:24 +0100
Subject: [PATCH 021/130] work in progress

---
 pyccel/ast/cudatypes.py                     |  4 +++
 pyccel/ast/variable.py                      | 39 ---------------------
 pyccel/codegen/printing/cucode.py           |  2 +-
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h |  3 --
 4 files changed, 5 insertions(+), 43 deletions(-)

diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index 5731aa6957..3e9a8df1cf 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -42,6 +42,10 @@ def __init__(self, dtype, rank, order, memory_location):
         self._memory_location = memory_location
         super().__init__()
 
+    @property
+    def memory_location(self):
+        return self._memory_location
+
     @lru_cache
     def __add__(self, other):
         test_type = np.zeros(1, dtype = pyccel_type_to_original_type[self.element_type])
diff --git a/pyccel/ast/variable.py b/pyccel/ast/variable.py
index 93f61f3698..b38dd100f5 100644
--- a/pyccel/ast/variable.py
+++ b/pyccel/ast/variable.py
@@ -56,11 +56,6 @@ class Variable(TypedAstNode):
         'stack' if memory should be allocated on the stack, represents stack arrays and scalars.
         'alias' if object allows access to memory stored in another variable.
 
-    memory_location: str, default: 'host'
-        'host' the variable can only be accessed by the CPU.
-        'device' the variable can only be accessed by the GPU.
-        'managed' the variable can be accessed by CPU and GPU and is being managed by the Cuda API (memory transfer is being done implicitly).
-
     is_const : bool, default: False
         Indicates if object is a const argument of a function.
 
@@ -147,10 +142,6 @@ def __init__(
             raise ValueError("memory_handling must be 'heap', 'stack' or 'alias'")
         self._memory_handling = memory_handling
 
-        if memory_location not in ('host', 'device', 'managed'):
-            raise ValueError("memory_location must be 'host', 'device' or 'managed'")
-        self._memory_location = memory_location
-
         if not isinstance(is_const, bool):
             raise TypeError('is_const must be a boolean.')
         self._is_const = is_const
@@ -333,36 +324,6 @@ def cls_base(self):
         """
         return self._cls_base
 
-    @property
-    def memory_location(self):
-        """ Indicates whether a Variable has a dynamic size
-        """
-        return self._memory_location
-
-    @memory_location.setter
-    def memory_location(self, memory_location):
-        if memory_location not in ('host', 'device', 'managed'):
-            raise ValueError("memory_location must be 'host', 'device' or 'managed'")
-        self._memory_location = memory_location
-
-    @property
-    def on_host(self):
-        """  Indicates if memory is only accessible by the CPU
-        """
-        return self.memory_location == 'host'
-
-    @property
-    def on_device(self):
-        """ Indicates if memory is only accessible by the GPU
-        """
-        return self.memory_location == 'device'
-
-    @property
-    def is_managed(self):
-        """ Indicates if memory is being managed by CUDA API
-        """
-        return self.memory_location == 'managed'
-
     @property
     def is_const(self):
         """
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index e0b1b2d1c7..d343272979 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -157,7 +157,7 @@ def _print_Allocate(self, expr):
             raise NotImplementedError(f"Don't know how to index {variable.class_type} type")
         shape_Assign = "int64_t shape_Assign [] = {" + shape + "};\n"
         is_view = 'false' if variable.on_heap else 'true'
-        memory_location = expr.variable.memory_location
+        memory_location = variable.class_type.memory_location
         if memory_location in ('device', 'host'):
             memory_location = 'allocateMemoryOn' + str(memory_location).capitalize()
         else:
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
index af586b7ac8..13e8419594 100644
--- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
@@ -20,7 +20,6 @@ typedef enum e_types
 
 enum e_memory_locations
 {
-        managedMemory,
         allocateMemoryOnHost,
         allocateMemoryOnDevice
 };
@@ -39,8 +38,6 @@ typedef struct  s_cuda_ndarray
     /* shape 'size of each dimension' */
     int64_t                 *shape;
     /* strides 'number of elements to skip to get the next element' */
-    int64_t                 *strides;
-    /* type of the array elements */
     t_types            type;
     /* type size of the array elements */
     int32_t                 type_size;

From 7c93416b2c25d5bf065b33041d0d8501f4a9c417 Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:41:27 +0100
Subject: [PATCH 022/130] Trigger tests on push to devel or main branch

---
 .github/workflows/anaconda_linux.yml   | 2 +-
 .github/workflows/anaconda_windows.yml | 2 +-
 .github/workflows/intel.yml            | 2 +-
 .github/workflows/linux.yml            | 2 +-
 .github/workflows/macosx.yml           | 2 +-
 .github/workflows/pickle.yml           | 2 +-
 .github/workflows/pickle_wheel.yml     | 2 +-
 .github/workflows/windows.yml          | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/anaconda_linux.yml b/.github/workflows/anaconda_linux.yml
index 5a5384e5ce..525903a54f 100644
--- a/.github/workflows/anaconda_linux.yml
+++ b/.github/workflows/anaconda_linux.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/anaconda_windows.yml b/.github/workflows/anaconda_windows.yml
index 154a4d01e8..0f3f8a04ed 100644
--- a/.github/workflows/anaconda_windows.yml
+++ b/.github/workflows/anaconda_windows.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: windows-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
index 977d5f9afd..5f340e1088 100644
--- a/.github/workflows/intel.yml
+++ b/.github/workflows/intel.yml
@@ -29,7 +29,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index ad39cee725..664ae3aa60 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   matrix_prep:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
     steps:
diff --git a/.github/workflows/macosx.yml b/.github/workflows/macosx.yml
index 4768a64efa..f51041c0b8 100644
--- a/.github/workflows/macosx.yml
+++ b/.github/workflows/macosx.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: macos-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/pickle.yml b/.github/workflows/pickle.yml
index 052028a5cb..cc3864afd2 100644
--- a/.github/workflows/pickle.yml
+++ b/.github/workflows/pickle.yml
@@ -31,7 +31,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-matrix.outputs.python_version }}
       matrix: ${{ steps.set-matrix.outputs.matrix }}
diff --git a/.github/workflows/pickle_wheel.yml b/.github/workflows/pickle_wheel.yml
index 1dc82af503..718dc13dcc 100644
--- a/.github/workflows/pickle_wheel.yml
+++ b/.github/workflows/pickle_wheel.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 60c560ffee..827038a279 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: windows-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:

From f8ec72265db6b1d482913d3c849edfea75df96f9 Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:46:33 +0100
Subject: [PATCH 023/130] Add cuda workflow to test cuda developments on CI

---
 .github/actions/coverage_install/action.yml |  2 +-
 .github/actions/linux_install/action.yml    | 10 +--
 .github/actions/pytest_run/action.yml       |  4 +-
 .github/actions/pytest_run_cuda/action.yml  | 17 +++++
 .github/actions/python_install/action.yml   | 17 +++++
 .github/workflows/cuda.yml                  | 83 +++++++++++++++++++++
 ci_tools/bot_messages/show_tests.txt        |  1 +
 ci_tools/bot_tools/bot_funcs.py             | 12 +--
 ci_tools/devel_branch_tests.py              |  1 +
 ci_tools/json_pytest_output.py              |  2 +-
 10 files changed, 135 insertions(+), 14 deletions(-)
 create mode 100644 .github/actions/pytest_run_cuda/action.yml
 create mode 100644 .github/actions/python_install/action.yml
 create mode 100644 .github/workflows/cuda.yml

diff --git a/.github/actions/coverage_install/action.yml b/.github/actions/coverage_install/action.yml
index ac5294e542..5732baee34 100644
--- a/.github/actions/coverage_install/action.yml
+++ b/.github/actions/coverage_install/action.yml
@@ -15,7 +15,7 @@ runs:
     - name: Directory Creation
       run: |
         INSTALL_DIR=$(cd tests; python -c "import pyccel; print(pyccel.__path__[0])")
-        SITE_DIR=$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
+        SITE_DIR=$(dirname ${INSTALL_DIR})
         echo -e "import coverage; coverage.process_startup()" > ${SITE_DIR}/pyccel_cov.pth
         echo -e "[run]\nparallel = True\nsource = ${INSTALL_DIR}\ndata_file = $(pwd)/.coverage\n[report]\ninclude = ${INSTALL_DIR}/*\n[xml]\noutput = cobertura.xml" > .coveragerc
         echo "SITE_DIR=${SITE_DIR}" >> $GITHUB_ENV
diff --git a/.github/actions/linux_install/action.yml b/.github/actions/linux_install/action.yml
index 8fb5cd8505..0ef9a69b8e 100644
--- a/.github/actions/linux_install/action.yml
+++ b/.github/actions/linux_install/action.yml
@@ -9,22 +9,22 @@ runs:
       shell: bash
     - name: Install fortran
       run:
-        sudo apt-get install gfortran
+        sudo apt-get install -y gfortran
       shell: bash
     - name: Install LaPack
       run:
-        sudo apt-get install libblas-dev liblapack-dev
+        sudo apt-get install -y libblas-dev liblapack-dev
       shell: bash
     - name: Install MPI
       run: |
-        sudo apt-get install libopenmpi-dev openmpi-bin
+        sudo apt-get install -y libopenmpi-dev openmpi-bin
         echo "MPI_OPTS=--oversubscribe" >> $GITHUB_ENV
       shell: bash
     - name: Install OpenMP
       run:
-        sudo apt-get install libomp-dev libomp5
+        sudo apt-get install -y libomp-dev libomp5
       shell: bash
     - name: Install Valgrind
       run:
-        sudo apt-get install valgrind
+        sudo apt-get install -y valgrind
       shell: bash
diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml
index 0b6f0f988d..b0bdc31f16 100644
--- a/.github/actions/pytest_run/action.yml
+++ b/.github/actions/pytest_run/action.yml
@@ -51,13 +51,13 @@ runs:
       working-directory: ./tests
       id: pytest_3
     - name: Test Fortran translations
-      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
+      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
       id: pytest_4
     - name: Test multi-file Fortran translations
       run: |
-        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
+        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml
new file mode 100644
index 0000000000..52092a6e02
--- /dev/null
+++ b/.github/actions/pytest_run_cuda/action.yml
@@ -0,0 +1,17 @@
+name: 'Pyccel pytest commands generating Ccuda'
+inputs:
+  shell_cmd:
+    description: 'Specifies the shell command (different for anaconda)'
+    required: false
+    default: "bash"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Ccuda tests with pytest
+      run: |
+        # Catch exit 5 (no tests found)
+        sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
+        pyccel-clean
+      shell: ${{ inputs.shell_cmd }}
+      working-directory: ./tests
diff --git a/.github/actions/python_install/action.yml b/.github/actions/python_install/action.yml
new file mode 100644
index 0000000000..f9b720e3e1
--- /dev/null
+++ b/.github/actions/python_install/action.yml
@@ -0,0 +1,17 @@
+name: 'Python installation commands'
+
+runs:
+  using: "composite"
+  steps:
+    - name: Install python
+      run:
+        sudo apt-get -y install python3-dev
+      shell: bash
+    - name: python as python3
+      run:
+        sudo apt-get -y install python-is-python3
+      shell: bash
+    - name: Install Pip
+      run:
+        sudo apt-get -y install python3-pip
+      shell: bash
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
new file mode 100644
index 0000000000..833ebf5d85
--- /dev/null
+++ b/.github/workflows/cuda.yml
@@ -0,0 +1,83 @@
+name: Cuda unit tests
+
+on:
+  workflow_dispatch:
+    inputs:
+      python_version:
+        required: false
+        type: string
+      ref:
+        required: false
+        type: string
+      check_run_id:
+        required: false
+        type: string
+      pr_repo:
+        required: false
+        type: string
+  push:
+    branches: [devel, main]
+
+env:
+  COMMIT: ${{ inputs.ref || github.event.ref }}
+  PEM: ${{ secrets.BOT_PEM }}
+  GITHUB_RUN_ID: ${{ github.run_id }}
+  GITHUB_CHECK_RUN_ID: ${{ inputs.check_run_id }}
+  PR_REPO: ${{ inputs.pr_repo || github.repository }}
+
+jobs:
+  Cuda:
+
+    runs-on: ubuntu-20.04
+    name: Unit tests
+
+    container: nvidia/cuda:11.7.1-devel-ubuntu20.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          ref: ${{ env.COMMIT }}
+          repository: ${{ env.PR_REPO }}
+      - name: Prepare docker
+        run: |
+          apt update && apt install sudo
+          TZ=Europe/France
+          ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+          DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata
+        shell: bash
+      - name: Install python (setup-python action doesn't work with containers)
+        uses: ./.github/actions/python_install
+      - name: "Setup"
+        id: token
+        run: |
+          pip install jwt requests
+          python ci_tools/setup_check_run.py cuda
+      - name: CUDA Version
+        run: nvcc --version # cuda install check
+      - name: Install dependencies
+        uses: ./.github/actions/linux_install
+      - name: Install Pyccel with tests
+        run: |
+            PATH=${PATH}:$HOME/.local/bin
+            echo "PATH=${PATH}" >> $GITHUB_ENV
+            python -m pip install --upgrade pip
+            python -m pip install --user .[test]
+        shell: bash
+      - name: Coverage install
+        uses: ./.github/actions/coverage_install
+      - name: Ccuda tests with pytest
+        id: cuda_pytest
+        uses: ./.github/actions/pytest_run_cuda
+      - name: Collect coverage information
+        continue-on-error: True
+        uses: ./.github/actions/coverage_collection
+      - name: Save code coverage report
+        uses: actions/upload-artifact@v3
+        with:
+          name: coverage-artifact
+          path: .coverage
+          retention-days: 1
+      - name: "Post completed"
+        if: always()
+        run:
+          python ci_tools/complete_check_run.py ${{ steps.cuda_pytest.outcome }}
+
diff --git a/ci_tools/bot_messages/show_tests.txt b/ci_tools/bot_messages/show_tests.txt
index adc07e8431..eb15492d2e 100644
--- a/ci_tools/bot_messages/show_tests.txt
+++ b/ci_tools/bot_messages/show_tests.txt
@@ -2,6 +2,7 @@ The following is a list of keywords which can be used to run tests. Tests in bol
 - **linux** : Runs the unit tests on a Linux system.
 - **windows** : Runs the unit tests on a Windows system.
 - **macosx** : Runs the unit tests on a MacOS X system.
+- **cuda** : Runs the cuda unit tests on a Linux system.
 - **coverage** : Runs the unit tests on a Linux system and checks the coverage of the tests.
 - **docs** : Checks if the documentation follows the numpydoc format.
 - **pylint** : Runs pylint on files which are too big to be handled by codacy.
diff --git a/ci_tools/bot_tools/bot_funcs.py b/ci_tools/bot_tools/bot_funcs.py
index 7084a01bb9..1621d1d089 100644
--- a/ci_tools/bot_tools/bot_funcs.py
+++ b/ci_tools/bot_tools/bot_funcs.py
@@ -23,7 +23,8 @@
         'pyccel_lint': '3.8',
         'pylint': '3.8',
         'spelling': '3.8',
-        'windows': '3.8'
+        'windows': '3.8',
+        'cuda': '-'
         }
 
 test_names = {
@@ -40,15 +41,16 @@
         'pyccel_lint': "Pyccel best practices",
         'pylint': "Python linting",
         'spelling': "Spelling verification",
-        'windows': "Unit tests on Windows"
+        'windows': "Unit tests on Windows",
+        'cuda': "Unit tests on Linux with cuda"
         }
 
-test_dependencies = {'coverage':['linux']}
+test_dependencies = {'coverage':['linux', 'cuda']}
 
 tests_with_base = ('coverage', 'docs', 'pyccel_lint', 'pylint')
 
 pr_test_keys = ('linux', 'windows', 'macosx', 'coverage', 'docs', 'pylint',
-                'pyccel_lint', 'spelling')
+                'pyccel_lint', 'spelling', 'cuda')
 
 review_stage_labels = ["needs_initial_review", "Ready_for_review", "Ready_to_merge"]
 
@@ -420,7 +422,7 @@ def is_test_required(self, commit_log, name, key, state):
             True if the test should be run, False otherwise.
         """
         print("Checking : ", name, key)
-        if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel'):
+        if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel', 'cuda'):
             has_relevant_change = lambda diff: any((f.startswith('pyccel/') or f.startswith('tests/')) #pylint: disable=unnecessary-lambda-assignment
                                                     and f.endswith('.py') and f != 'pyccel/version.py'
                                                     for f in diff)
diff --git a/ci_tools/devel_branch_tests.py b/ci_tools/devel_branch_tests.py
index 1102ef9e92..ec67b6c49a 100644
--- a/ci_tools/devel_branch_tests.py
+++ b/ci_tools/devel_branch_tests.py
@@ -15,3 +15,4 @@
     bot.run_tests(['anaconda_linux'], '3.10', force_run = True)
     bot.run_tests(['anaconda_windows'], '3.10', force_run = True)
     bot.run_tests(['intel'], '3.9', force_run = True)
+    bot.run_tests(['cuda'], '-', force_run = True)
diff --git a/ci_tools/json_pytest_output.py b/ci_tools/json_pytest_output.py
index 409ae76d72..b84f4a4c09 100644
--- a/ci_tools/json_pytest_output.py
+++ b/ci_tools/json_pytest_output.py
@@ -61,7 +61,7 @@ def     mini_md_summary(title, outcome, failed_tests):
     summary = ""
 
     failed_pattern = re.compile(r".*FAILED.*")
-    languages = ('c', 'fortran', 'python')
+    languages = ('c', 'fortran', 'python', 'cuda')
     pattern = {lang: re.compile(r".*\["+lang+r"\]\ \_.*") for lang in languages}
 
     for i in p_args.tests:

From cc3a93ee1ea3df11d914519b8279eecbf853cb9f Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:41:27 +0100
Subject: [PATCH 024/130] Trigger tests on push to devel or main branch

---
 .github/workflows/deploy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 9111b47d52..cf52b1c624 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -10,7 +10,7 @@ jobs:
   waitForWorklows:
     name: Wait for workflows
     runs-on: ubuntu-latest
-    if: github.event.workflow_run.head_branch == 'main'
+    if: github.event.workflow_run.head_branch == 'main' && github.repository == 'pyccel/pyccel'
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4

From 3be623da6d5658f13fece4c47e734e1fe40ff6b9 Mon Sep 17 00:00:00 2001
From: bauom <40796259+bauom@users.noreply.github.com>
Date: Wed, 28 Feb 2024 18:11:50 +0100
Subject: [PATCH 025/130] [init] Adding CUDA language/compiler and CodePrinter
 (#32)

This PR aims to make the C code compilable using nvcc. The cuda language was added as well as a CudaCodePrinter.

Changes to stdlib:

Wrapped expressions using complex types in an `ifndef __NVCC__` to avoid processing them with the nvcc compiler

---------

Co-authored-by: Mouad Elalj, EmilyBourne
---
 .dict_custom.txt                           |   1 +
 .github/actions/pytest_parallel/action.yml |   4 +-
 .github/actions/pytest_run/action.yml      |   4 +-
 .github/actions/pytest_run_cuda/action.yml |  11 +-
 CHANGELOG.md                               |   6 +
 pyccel/codegen/codegen.py                  |   8 +-
 pyccel/codegen/compiling/compilers.py      |   5 +-
 pyccel/codegen/pipeline.py                 |   5 +-
 pyccel/codegen/printing/cucode.py          |  74 +++++++++++
 pyccel/commands/console.py                 |   2 +-
 pyccel/compilers/default_compilers.py      |  13 +-
 pyccel/naming/__init__.py                  |   4 +-
 pyccel/naming/cudanameclashchecker.py      |  92 ++++++++++++++
 pyccel/stdlib/numpy/numpy_c.c              |   2 +
 pyccel/stdlib/numpy/numpy_c.h              |   2 +
 pytest.ini                                 |   1 +
 tests/conftest.py                          |  11 ++
 tests/epyccel/test_base.py                 | 136 ++++++++++-----------
 18 files changed, 298 insertions(+), 83 deletions(-)
 create mode 100644 pyccel/codegen/printing/cucode.py
 create mode 100644 pyccel/naming/cudanameclashchecker.py

diff --git a/.dict_custom.txt b/.dict_custom.txt
index 82a6b10d31..ae99f31ed4 100644
--- a/.dict_custom.txt
+++ b/.dict_custom.txt
@@ -110,6 +110,7 @@ Valgrind
 variadic
 subclasses
 oneAPI
+Cuda
 getter
 setter
 bitwise
diff --git a/.github/actions/pytest_parallel/action.yml b/.github/actions/pytest_parallel/action.yml
index c7c77d99c7..f91d84915b 100644
--- a/.github/actions/pytest_parallel/action.yml
+++ b/.github/actions/pytest_parallel/action.yml
@@ -10,8 +10,8 @@ runs:
   steps:
     - name: Test with pytest
       run: |
-        mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m parallel -rXx
-        #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m parallel -rXx
+        mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m "parallel and not cuda" -rXx
+        #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m "parallel and not cuda" -rXx
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
 
diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml
index b0bdc31f16..451fa39e92 100644
--- a/.github/actions/pytest_run/action.yml
+++ b/.github/actions/pytest_run/action.yml
@@ -51,13 +51,13 @@ runs:
       working-directory: ./tests
       id: pytest_3
     - name: Test Fortran translations
-      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
+      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
       id: pytest_4
     - name: Test multi-file Fortran translations
       run: |
-        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
+        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml
index 52092a6e02..46f90552ed 100644
--- a/.github/actions/pytest_run_cuda/action.yml
+++ b/.github/actions/pytest_run_cuda/action.yml
@@ -1,4 +1,4 @@
-name: 'Pyccel pytest commands generating Ccuda'
+name: 'Pyccel pytest commands generating Cuda'
 inputs:
   shell_cmd:
     description: 'Specifies the shell command (different for anaconda)'
@@ -11,7 +11,14 @@ runs:
     - name: Ccuda tests with pytest
       run: |
         # Catch exit 5 (no tests found)
-        sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
+        python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
+    - name: Final step
+      if: always()
+      id: status
+      run:
+        python ci_tools/json_pytest_output.py -t "Cuda Test Summary" --tests "Cuda tests:${{ steps.pytest_1.outcome }}:tests/s1_outfile.out"
+                      
+      shell: ${{ inputs.shell_cmd }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 695dc72cf7..d6928b0eca 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,12 @@
 # Change Log
 All notable changes to this project will be documented in this file.
 
+## \[Cuda - UNRELEASED\]
+
+### Added
+
+-   #32 : add support for `nvcc` Compiler and `cuda` language as a possible option.
+
 ## \[UNRELEASED\]
 
 ### Added
diff --git a/pyccel/codegen/codegen.py b/pyccel/codegen/codegen.py
index daf4559df4..8d4abb6bdb 100644
--- a/pyccel/codegen/codegen.py
+++ b/pyccel/codegen/codegen.py
@@ -9,16 +9,18 @@
 from pyccel.codegen.printing.fcode  import FCodePrinter
 from pyccel.codegen.printing.ccode  import CCodePrinter
 from pyccel.codegen.printing.pycode import PythonCodePrinter
+from pyccel.codegen.printing.cucode import CudaCodePrinter
 
 from pyccel.ast.core      import FunctionDef, Interface, ModuleHeader
 from pyccel.utilities.stage import PyccelStage
 
-_extension_registry = {'fortran': 'f90', 'c':'c',  'python':'py'}
-_header_extension_registry = {'fortran': None, 'c':'h',  'python':None}
+_extension_registry = {'fortran': 'f90', 'c':'c',  'python':'py', 'cuda':'cu'}
+_header_extension_registry = {'fortran': None, 'c':'h',  'python':None, 'cuda':'h'}
 printer_registry    = {
                         'fortran':FCodePrinter,
                         'c':CCodePrinter,
-                        'python':PythonCodePrinter
+                        'python':PythonCodePrinter,
+                        'cuda':CudaCodePrinter
                       }
 
 pyccel_stage = PyccelStage()
diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py
index c866ee5b1a..d909a5036e 100644
--- a/pyccel/codegen/compiling/compilers.py
+++ b/pyccel/codegen/compiling/compilers.py
@@ -444,7 +444,10 @@ def compile_shared_library(self, compile_obj, output_folder, verbose = False, sh
         # Collect compile information
         exec_cmd, includes, libs_flags, libdirs_flags, m_code = \
                 self._get_compile_components(compile_obj, accelerators)
-        linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags]
+        if self._info['exec'] == 'nvcc':
+            linker_libdirs_flags = ['-Xcompiler' if l == '-L' else f'"-Wl,-rpath,{l}"' for l in libdirs_flags]
+        else:
+            linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags]
 
         flags.insert(0,"-shared")
 
diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py
index 14087fb567..eb357fab74 100644
--- a/pyccel/codegen/pipeline.py
+++ b/pyccel/codegen/pipeline.py
@@ -180,9 +180,10 @@ def handle_error(stage):
     if language is None:
         language = 'fortran'
 
-    # Choose Fortran compiler
+    # Choose Default compiler
     if compiler is None:
-        compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', 'GNU')
+        default_compiler_family = 'nvidia' if language == 'cuda' else 'GNU'
+        compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', default_compiler_family)
 
     fflags = [] if fflags is None else fflags.split()
     wrapper_flags = [] if wrapper_flags is None else wrapper_flags.split()
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
new file mode 100644
index 0000000000..86146b065b
--- /dev/null
+++ b/pyccel/codegen/printing/cucode.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Provide tools for generating and handling CUDA code.
+This module is designed to interface Pyccel's Abstract Syntax Tree (AST) with CUDA,
+enabling the direct translation of high-level Pyccel expressions into CUDA code.
+"""
+
+from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers
+
+from pyccel.ast.core        import Import, Module
+
+from pyccel.errors.errors   import Errors
+
+
+errors = Errors()
+
+__all__ = ["CudaCodePrinter"]
+
+class CudaCodePrinter(CCodePrinter):
+    """
+    Print code in CUDA format.
+
+    This printer converts Pyccel's Abstract Syntax Tree (AST) into strings of CUDA code.
+    Navigation through this file utilizes _print_X functions,
+    as is common with all printers.
+
+    Parameters
+    ----------
+    filename : str
+            The name of the file being pyccelised.
+    prefix_module : str
+            A prefix to be added to the name of the module.
+    """
+    language = "cuda"
+
+    def __init__(self, filename, prefix_module = None):
+
+        errors.set_target(filename)
+
+        super().__init__(filename)
+
+    def _print_Module(self, expr):
+        self.set_scope(expr.scope)
+        self._current_module = expr.name
+        body = ''.join(self._print(i) for i in expr.body)
+
+        global_variables = ''.join(self._print(d) for d in expr.declarations)
+
+        # Print imports last to be sure that all additional_imports have been collected
+        imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()]
+        c_headers_imports = ''
+        local_imports = ''
+
+        for imp in imports:
+            if imp.source in c_library_headers:
+                c_headers_imports += self._print(imp)
+            else:
+                local_imports += self._print(imp)
+
+        imports = f'{c_headers_imports}\
+                    extern "C"{{\n\
+                    {local_imports}\
+                    }}'
+
+        code = f'{imports}\n\
+                 {global_variables}\n\
+                 {body}\n'
+
+        self.exit_scope()
+        return code
diff --git a/pyccel/commands/console.py b/pyccel/commands/console.py
index 596c440ec0..fcbec009de 100644
--- a/pyccel/commands/console.py
+++ b/pyccel/commands/console.py
@@ -80,7 +80,7 @@ def pyccel(files=None, mpi=None, openmp=None, openacc=None, output_dir=None, com
     # ... backend compiler options
     group = parser.add_argument_group('Backend compiler options')
 
-    group.add_argument('--language', choices=('fortran', 'c', 'python'), help='Generated language')
+    group.add_argument('--language', choices=('fortran', 'c', 'python', 'cuda'), help='Generated language')
 
     group.add_argument('--compiler', help='Compiler family or json file containing a compiler description {GNU,intel,PGI}')
 
diff --git a/pyccel/compilers/default_compilers.py b/pyccel/compilers/default_compilers.py
index 166085d22e..d47856773c 100644
--- a/pyccel/compilers/default_compilers.py
+++ b/pyccel/compilers/default_compilers.py
@@ -185,6 +185,15 @@
                 },
             'family': 'nvidia',
             }
+#------------------------------------------------------------
+nvcc_info = {'exec'         : 'nvcc',
+             'language'     : 'cuda',
+             'debug_flags'  : ("-g",),
+             'release_flags': ("-O3",),
+             'general_flags': ('--compiler-options', '-fPIC',),
+             'family'       : 'nvidia'
+            }
+
 
 #------------------------------------------------------------
 def change_to_lib_flag(lib):
@@ -288,6 +297,7 @@ def change_to_lib_flag(lib):
 pgfortran_info.update(python_info)
 nvc_info.update(python_info)
 nvfort_info.update(python_info)
+nvcc_info.update(python_info)
 
 available_compilers = {('GNU', 'c') : gcc_info,
                        ('GNU', 'fortran') : gfort_info,
@@ -296,6 +306,7 @@ def change_to_lib_flag(lib):
                        ('PGI', 'c') : pgcc_info,
                        ('PGI', 'fortran') : pgfortran_info,
                        ('nvidia', 'c') : nvc_info,
-                       ('nvidia', 'fortran') : nvfort_info}
+                       ('nvidia', 'fortran') : nvfort_info,
+                       ('nvidia', 'cuda'): nvcc_info}
 
 vendors = ('GNU','intel','PGI','nvidia')
diff --git a/pyccel/naming/__init__.py b/pyccel/naming/__init__.py
index 72c318d3ad..b3e4bbbe0e 100644
--- a/pyccel/naming/__init__.py
+++ b/pyccel/naming/__init__.py
@@ -10,7 +10,9 @@
 from .fortrannameclashchecker import FortranNameClashChecker
 from .cnameclashchecker import CNameClashChecker
 from .pythonnameclashchecker import PythonNameClashChecker
+from .cudanameclashchecker import CudaNameClashChecker
 
 name_clash_checkers = {'fortran':FortranNameClashChecker(),
         'c':CNameClashChecker(),
-        'python':PythonNameClashChecker()}
+        'python':PythonNameClashChecker(),
+        'cuda':CudaNameClashChecker()}
diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py
new file mode 100644
index 0000000000..971204e912
--- /dev/null
+++ b/pyccel/naming/cudanameclashchecker.py
@@ -0,0 +1,92 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Handles name clash problems in Cuda
+"""
+from .languagenameclashchecker import LanguageNameClashChecker
+
+class CudaNameClashChecker(LanguageNameClashChecker):
+    """
+    Class containing functions to help avoid problematic names in Cuda.
+
+    A class which provides functionalities to check or propose variable names and
+    verify that they do not cause name clashes. Name clashes may be due to
+    new variables, or due to the use of reserved keywords.
+    """
+    # Keywords as mentioned on https://en.cppreference.com/w/c/keyword
+    keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const',
+        'continue', 'default', 'do', 'double', 'else', 'enum',
+        'extern', 'float', 'for', 'goto', 'if', 'inline', 'int',
+        'long', 'register', 'restrict', 'return', 'short', 'signed',
+        'sizeof', 'static', 'struct', 'switch', 'typedef', 'union',
+        'unsigned', 'void', 'volatile', 'whie', '_Alignas',
+        '_Alignof', '_Atomic', '_Bool', '_Complex', 'Decimal128',
+        '_Decimal32', '_Decimal64', '_Generic', '_Imaginary',
+        '_Noreturn', '_Static_assert', '_Thread_local', 't_ndarray',
+        'array_create', 'new_slice', 'array_slicing', 'alias_assign',
+        'transpose_alias_assign', 'array_fill', 't_slice',
+        'GET_INDEX_EXP1', 'GET_INDEX_EXP2', 'GET_INDEX_EXP2',
+        'GET_INDEX_EXP3', 'GET_INDEX_EXP4', 'GET_INDEX_EXP5',
+        'GET_INDEX_EXP6', 'GET_INDEX_EXP7', 'GET_INDEX_EXP8',
+        'GET_INDEX_EXP9', 'GET_INDEX_EXP10', 'GET_INDEX_EXP11',
+        'GET_INDEX_EXP12', 'GET_INDEX_EXP13', 'GET_INDEX_EXP14',
+        'GET_INDEX_EXP15', 'NUM_ARGS_H1', 'NUM_ARGS',
+        'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX',
+        'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer',
+        'get_index', 'numpy_to_ndarray_strides',
+        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'])
+
+    def has_clash(self, name, symbols):
+        """
+        Indicate whether the proposed name causes any clashes.
+
+        Checks if a suggested name conflicts with predefined
+        keywords or specified symbols,returning true for a clash.
+        This method is crucial for maintaining namespace integrity and
+        preventing naming conflicts in code generation processes.
+
+        Parameters
+        ----------
+        name : str
+            The suggested name.
+        symbols : set
+            Symbols which should be considered as collisions.
+
+        Returns
+        -------
+        bool
+            True if the name is a collision.
+            False if the name is collision free.
+        """
+        return any(name == k for k in self.keywords) or \
+               any(name == s for s in symbols)
+
+    def get_collisionless_name(self, name, symbols):
+        """
+        Get a valid name which doesn't collision with symbols or Cuda keywords.
+
+        Find a new name based on the suggested name which will not cause
+        conflicts with Cuda keywords, does not appear in the provided symbols,
+        and is a valid name in Cuda code.
+
+        Parameters
+        ----------
+        name : str
+            The suggested name.
+        symbols : set
+            Symbols which should be considered as collisions.
+
+        Returns
+        -------
+        str
+            A new name which is collision free.
+        """
+        if len(name)>4 and all(name[i] == '_' for i in (0,1,-1,-2)):
+            # Ignore magic methods
+            return name
+        if name[0] == '_':
+            name = 'private'+name
+        return self._get_collisionless_name(name, symbols)
diff --git a/pyccel/stdlib/numpy/numpy_c.c b/pyccel/stdlib/numpy/numpy_c.c
index 7c9ecbbf6b..bc56214772 100644
--- a/pyccel/stdlib/numpy/numpy_c.c
+++ b/pyccel/stdlib/numpy/numpy_c.c
@@ -17,8 +17,10 @@ double  fsign(double x)
     return SIGN(x);
 }
 
+#ifndef __NVCC__
 /* numpy.sign for complex */
 double complex csign(double complex x)
 {
     return x ? ((!creal(x) && cimag(x) < 0) || (creal(x) < 0) ? -1 : 1) : 0;
 }
+#endif
diff --git a/pyccel/stdlib/numpy/numpy_c.h b/pyccel/stdlib/numpy/numpy_c.h
index e72cf3ad57..c2a16a5516 100644
--- a/pyccel/stdlib/numpy/numpy_c.h
+++ b/pyccel/stdlib/numpy/numpy_c.h
@@ -15,6 +15,8 @@
 
 long long int isign(long long int x);
 double fsign(double x);
+#ifndef __NVCC__
 double complex csign(double complex x);
+#endif
 
 #endif
diff --git a/pytest.ini b/pytest.ini
index 42eb0d72ba..3792ab65f9 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -9,3 +9,4 @@ markers =
     python: test to generate python code
     xdist_incompatible: test which compiles a file also compiled by another test
     external: test using an external dll (problematic with conda on Windows)
+    cuda: test to generate cuda code
diff --git a/tests/conftest.py b/tests/conftest.py
index 79144b6978..a5082ef6e8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -21,6 +21,17 @@
 def language(request):
     return request.param
 
+@pytest.fixture( params=[
+        pytest.param("fortran", marks = pytest.mark.fortran),
+        pytest.param("c", marks = pytest.mark.c),
+        pytest.param("python", marks = pytest.mark.python),
+        pytest.param("cuda", marks = pytest.mark.cuda)
+    ],
+    scope = "session"
+)
+def language_with_cuda(request):
+    return request.param
+
 def move_coverage(path_dir):
     for root, _, files in os.walk(path_dir):
         for name in files:
diff --git a/tests/epyccel/test_base.py b/tests/epyccel/test_base.py
index c22064d321..413f79eef1 100644
--- a/tests/epyccel/test_base.py
+++ b/tests/epyccel/test_base.py
@@ -7,128 +7,128 @@
 from utilities import epyccel_test
 
 
-def test_is_false(language):
-    test = epyccel_test(base.is_false, lang=language)
+def test_is_false(language_with_cuda):
+    test = epyccel_test(base.is_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_is_true(language):
-    test = epyccel_test(base.is_true, lang=language)
+def test_is_true(language_with_cuda):
+    test = epyccel_test(base.is_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_compare_is(language):
-    test = epyccel_test(base.compare_is, lang=language)
+def test_compare_is(language_with_cuda):
+    test = epyccel_test(base.compare_is, lang=language_with_cuda)
     test.compare_epyccel( True, True )
     test.compare_epyccel( True, False )
     test.compare_epyccel( False, True )
     test.compare_epyccel( False, False )
 
-def test_compare_is_not(language):
-    test = epyccel_test(base.compare_is_not, lang=language)
+def test_compare_is_not(language_with_cuda):
+    test = epyccel_test(base.compare_is_not, lang=language_with_cuda)
     test.compare_epyccel( True, True )
     test.compare_epyccel( True, False )
     test.compare_epyccel( False, True )
     test.compare_epyccel( False, False )
 
-def test_compare_is_int(language):
-    test = epyccel_test(base.compare_is_int, lang=language)
+def test_compare_is_int(language_with_cuda):
+    test = epyccel_test(base.compare_is_int, lang=language_with_cuda)
     test.compare_epyccel( True, 1 )
     test.compare_epyccel( True, 0 )
     test.compare_epyccel( False, 1 )
     test.compare_epyccel( False, 0 )
 
-def test_compare_is_not_int(language):
-    test = epyccel_test(base.compare_is_not_int, lang=language)
+def test_compare_is_not_int(language_with_cuda):
+    test = epyccel_test(base.compare_is_not_int, lang=language_with_cuda)
     test.compare_epyccel( True, 1 )
     test.compare_epyccel( True, 0 )
     test.compare_epyccel( False, 1 )
     test.compare_epyccel( False, 0 )
 
-def test_not_false(language):
-    test = epyccel_test(base.not_false, lang=language)
+def test_not_false(language_with_cuda):
+    test = epyccel_test(base.not_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not_true(language):
-    test = epyccel_test(base.not_true, lang=language)
+def test_not_true(language_with_cuda):
+    test = epyccel_test(base.not_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_eq_false(language):
-    test = epyccel_test(base.eq_false, lang=language)
+def test_eq_false(language_with_cuda):
+    test = epyccel_test(base.eq_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_eq_true(language):
-    test = epyccel_test(base.eq_true, lang=language)
+def test_eq_true(language_with_cuda):
+    test = epyccel_test(base.eq_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_neq_false(language):
-    test = epyccel_test(base.eq_false, lang=language)
+def test_neq_false(language_with_cuda):
+    test = epyccel_test(base.eq_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_neq_true(language):
-    test = epyccel_test(base.eq_true, lang=language)
+def test_neq_true(language_with_cuda):
+    test = epyccel_test(base.eq_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not(language):
-    test = epyccel_test(base.not_val, lang=language)
+def test_not(language_with_cuda):
+    test = epyccel_test(base.not_val, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not_int(language):
-    test = epyccel_test(base.not_int, lang=language)
+def test_not_int(language_with_cuda):
+    test = epyccel_test(base.not_int, lang=language_with_cuda)
     test.compare_epyccel( 0 )
     test.compare_epyccel( 4 )
 
-def test_compare_is_nil(language):
-    test = epyccel_test(base.is_nil, lang=language)
+def test_compare_is_nil(language_with_cuda):
+    test = epyccel_test(base.is_nil, lang=language_with_cuda)
     test.compare_epyccel( None )
 
-def test_compare_is_not_nil(language):
-    test = epyccel_test(base.is_not_nil, lang=language)
+def test_compare_is_not_nil(language_with_cuda):
+    test = epyccel_test(base.is_not_nil, lang=language_with_cuda)
     test.compare_epyccel( None )
 
-def test_cast_int(language):
-    test = epyccel_test(base.cast_int, lang=language)
+def test_cast_int(language_with_cuda):
+    test = epyccel_test(base.cast_int, lang=language_with_cuda)
     test.compare_epyccel( 4 )
-    test = epyccel_test(base.cast_float_to_int, lang=language)
+    test = epyccel_test(base.cast_float_to_int, lang=language_with_cuda)
     test.compare_epyccel( 4.5 )
 
-def test_cast_bool(language):
-    test = epyccel_test(base.cast_bool, lang=language)
+def test_cast_bool(language_with_cuda):
+    test = epyccel_test(base.cast_bool, lang=language_with_cuda)
     test.compare_epyccel( True )
 
-def test_cast_float(language):
-    test = epyccel_test(base.cast_float, lang=language)
+def test_cast_float(language_with_cuda):
+    test = epyccel_test(base.cast_float, lang=language_with_cuda)
     test.compare_epyccel( 4.5 )
-    test = epyccel_test(base.cast_int_to_float, lang=language)
+    test = epyccel_test(base.cast_int_to_float, lang=language_with_cuda)
     test.compare_epyccel( 4 )
 
-def test_if_0_int(language):
-    test = epyccel_test(base.if_0_int, lang=language)
+def test_if_0_int(language_with_cuda):
+    test = epyccel_test(base.if_0_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
     test.compare_epyccel( 0 )
 
-def test_if_0_real(language):
-    test = epyccel_test(base.if_0_real, lang=language)
+def test_if_0_real(language_with_cuda):
+    test = epyccel_test(base.if_0_real, lang=language_with_cuda)
     test.compare_epyccel( 22.3 )
     test.compare_epyccel( 0.0 )
 
-def test_same_int(language):
-    test = epyccel_test(base.is_same_int, lang=language)
+def test_same_int(language_with_cuda):
+    test = epyccel_test(base.is_same_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
-    test = epyccel_test(base.isnot_same_int, lang=language)
+    test = epyccel_test(base.isnot_same_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
 
-def test_same_float(language):
-    test = epyccel_test(base.is_same_float, lang=language)
+def test_same_float(language_with_cuda):
+    test = epyccel_test(base.is_same_float, lang=language_with_cuda)
     test.compare_epyccel( 22.2 )
-    test = epyccel_test(base.isnot_same_float, lang=language)
+    test = epyccel_test(base.isnot_same_float, lang=language_with_cuda)
     test.compare_epyccel( 22.2 )
 
 @pytest.mark.parametrize( 'language', [
@@ -150,28 +150,28 @@ def test_same_complex(language):
     test = epyccel_test(base.isnot_same_complex, lang=language)
     test.compare_epyccel( complex(2,3) )
 
-def test_is_types(language):
-    test = epyccel_test(base.is_types, lang=language)
+def test_is_types(language_with_cuda):
+    test = epyccel_test(base.is_types, lang=language_with_cuda)
     test.compare_epyccel( 1, 1.0 )
 
-def test_isnot_types(language):
-    test = epyccel_test(base.isnot_types, lang=language)
+def test_isnot_types(language_with_cuda):
+    test = epyccel_test(base.isnot_types, lang=language_with_cuda)
     test.compare_epyccel( 1, 1.0 )
 
-def test_none_is_none(language):
-    test = epyccel_test(base.none_is_none, lang=language)
+def test_none_is_none(language_with_cuda):
+    test = epyccel_test(base.none_is_none, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_none_isnot_none(language):
-    test = epyccel_test(base.none_isnot_none, lang=language)
+def test_none_isnot_none(language_with_cuda):
+    test = epyccel_test(base.none_isnot_none, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_pass_if(language):
-    test = epyccel_test(base.pass_if, lang=language)
+def test_pass_if(language_with_cuda):
+    test = epyccel_test(base.pass_if, lang=language_with_cuda)
     test.compare_epyccel(2)
 
-def test_pass2_if(language):
-    test = epyccel_test(base.pass2_if, lang=language)
+def test_pass2_if(language_with_cuda):
+    test = epyccel_test(base.pass2_if, lang=language_with_cuda)
     test.compare_epyccel(0.2)
     test.compare_epyccel(0.0)
 
@@ -192,15 +192,15 @@ def test_use_optional(language):
     test.compare_epyccel()
     test.compare_epyccel(6)
 
-def test_none_equality(language):
-    test = epyccel_test(base.none_equality, lang=language)
+def test_none_equality(language_with_cuda):
+    test = epyccel_test(base.none_equality, lang=language_with_cuda)
     test.compare_epyccel()
     test.compare_epyccel(6)
 
-def test_none_none_equality(language):
-    test = epyccel_test(base.none_none_equality, lang=language)
+def test_none_none_equality(language_with_cuda):
+    test = epyccel_test(base.none_none_equality, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_none_literal_equality(language):
-    test = epyccel_test(base.none_literal_equality, lang=language)
+def test_none_literal_equality(language_with_cuda):
+    test = epyccel_test(base.none_literal_equality, lang=language_with_cuda)
     test.compare_epyccel()

From b6d1549c8cb1999f76396d4919e98998d3367c55 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 15 May 2024 12:58:50 +0100
Subject: [PATCH 026/130] Fix import handling (#49)

This pull request fixes https://github.com/pyccel/pyccel-cuda/issues/48, by implementing a tiny wrapper for CUDA and a wrapper for non-CUDA functionalities only with external 'C'.

**Commit Summary**

-    Implemented new header printer for CUDA.
-    Added CUDA wrapper assignment
-    Instead of wrapping all local headers, wrap only C functions with extern 'C'

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
---
 CHANGELOG.md                                |  3 +-
 pyccel/codegen/printing/cucode.py           | 45 ++++++++----
 pyccel/codegen/python_wrapper.py            |  4 ++
 pyccel/codegen/wrapper/cuda_to_c_wrapper.py | 78 +++++++++++++++++++++
 tests/epyccel/modules/cuda_module.py        | 13 ++++
 tests/epyccel/test_epyccel_modules.py       | 13 ++++
 6 files changed, 142 insertions(+), 14 deletions(-)
 create mode 100644 pyccel/codegen/wrapper/cuda_to_c_wrapper.py
 create mode 100644 tests/epyccel/modules/cuda_module.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d6928b0eca..b897e14385 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,8 @@ All notable changes to this project will be documented in this file.
 
 ### Added
 
--   #32 : add support for `nvcc` Compiler and `cuda` language as a possible option.
+-   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
+-   #48 : Fix incorrect handling of imports in `cuda`.
 
 ## \[UNRELEASED\]
 
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 86146b065b..277d2a3a6a 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -52,19 +52,7 @@ def _print_Module(self, expr):
 
         # Print imports last to be sure that all additional_imports have been collected
         imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()]
-        c_headers_imports = ''
-        local_imports = ''
-
-        for imp in imports:
-            if imp.source in c_library_headers:
-                c_headers_imports += self._print(imp)
-            else:
-                local_imports += self._print(imp)
-
-        imports = f'{c_headers_imports}\
-                    extern "C"{{\n\
-                    {local_imports}\
-                    }}'
+        imports = ''.join(self._print(i) for i in imports)
 
         code = f'{imports}\n\
                  {global_variables}\n\
@@ -72,3 +60,34 @@ def _print_Module(self, expr):
 
         self.exit_scope()
         return code
+
+    def _print_ModuleHeader(self, expr):
+        self.set_scope(expr.module.scope)
+        self._in_header = True
+        name = expr.module.name
+
+        funcs = ""
+        cuda_headers = ""
+        for f in expr.module.funcs:
+            if not f.is_inline:
+                if 'kernel' in f.decorators:  # Checking for 'kernel' decorator
+                    cuda_headers += self.function_signature(f) + ';\n'
+                else:
+                    funcs += self.function_signature(f) + ';\n'
+        global_variables = ''.join('extern '+self._print(d) for d in expr.module.declarations if not d.variable.is_private)
+        # Print imports last to be sure that all additional_imports have been collected
+        imports = [*expr.module.imports, *self._additional_imports.values()]
+        imports = ''.join(self._print(i) for i in imports)
+
+        self._in_header = False
+        self.exit_scope()
+        function_declaration = f'{cuda_headers}\n\
+                    extern "C"{{\n\
+                    {funcs}\
+                    }}\n'
+        return '\n'.join((f"#ifndef {name.upper()}_H",
+                          f"#define {name.upper()}_H",
+                          global_variables,
+                          function_declaration,
+                          "#endif // {name.upper()}_H\n"))
+
diff --git a/pyccel/codegen/python_wrapper.py b/pyccel/codegen/python_wrapper.py
index 9437727042..62c303fa64 100644
--- a/pyccel/codegen/python_wrapper.py
+++ b/pyccel/codegen/python_wrapper.py
@@ -13,6 +13,7 @@
 from pyccel.codegen.printing.fcode               import FCodePrinter
 from pyccel.codegen.wrapper.fortran_to_c_wrapper import FortranToCWrapper
 from pyccel.codegen.wrapper.c_to_python_wrapper  import CToPythonWrapper
+from pyccel.codegen.wrapper.cuda_to_c_wrapper    import CudaToCWrapper
 from pyccel.codegen.utilities                    import recompile_object
 from pyccel.codegen.utilities                    import copy_internal_library
 from pyccel.codegen.utilities                    import internal_libs
@@ -144,6 +145,9 @@ def create_shared_library(codegen,
                 verbose=verbose)
         timings['Bind C wrapping'] = time.time() - start_bind_c_compiling
         c_ast = bind_c_mod
+    elif language == 'cuda':
+        wrapper = CudaToCWrapper()
+        c_ast = wrapper.wrap(codegen.ast)
     else:
         c_ast = codegen.ast
 
diff --git a/pyccel/codegen/wrapper/cuda_to_c_wrapper.py b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py
new file mode 100644
index 0000000000..c0e24c7c09
--- /dev/null
+++ b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Module describing the code-wrapping class : CudaToPythonWrapper
+which creates an interface exposing Cuda code to C.
+"""
+
+from pyccel.ast.bind_c      import BindCModule
+from pyccel.errors.errors   import Errors
+from pyccel.ast.bind_c      import BindCVariable
+from .wrapper               import Wrapper
+
+errors = Errors()
+
+class CudaToCWrapper(Wrapper):
+    """
+    Class for creating a wrapper exposing Cuda code to C.
+
+    While CUDA is typically compatible with C by default.
+    this wrapper becomes necessary in scenarios where specific adaptations
+    or modifications are required to ensure seamless integration with C.
+    """
+
+    def _wrap_Module(self, expr):
+        """
+        Create a Module which is compatible with C.
+
+        Create a Module which provides an interface between C and the
+        Module described by expr.
+
+        Parameters
+        ----------
+        expr : pyccel.ast.core.Module
+            The module to be wrapped.
+
+        Returns
+        -------
+        pyccel.ast.core.BindCModule
+            The C-compatible module.
+        """
+        init_func = expr.init_func
+        if expr.interfaces:
+            errors.report("Interface wrapping is not yet supported for Cuda",
+                      severity='warning', symbol=expr)
+        if expr.classes:
+            errors.report("Class wrapping is not yet supported for Cuda",
+                      severity='warning', symbol=expr)
+
+        variables = [self._wrap(v) for v in expr.variables]
+
+        return BindCModule(expr.name, variables, expr.funcs,
+                init_func=init_func,
+                scope = expr.scope,
+                original_module=expr)
+
+    def _wrap_Variable(self, expr):
+        """
+        Create all objects necessary to expose a module variable to C.
+
+        Create and return the objects which must be printed in the wrapping
+        module in order to expose the variable to C
+
+        Parameters
+        ----------
+        expr : pyccel.ast.variables.Variable
+            The module variable.
+
+        Returns
+        -------
+        pyccel.ast.core.BindCVariable
+            The C-compatible variable. which must be printed in
+            the wrapping module to expose the variable.
+        """
+        return expr.clone(expr.name, new_class = BindCVariable)
+
diff --git a/tests/epyccel/modules/cuda_module.py b/tests/epyccel/modules/cuda_module.py
new file mode 100644
index 0000000000..bb7ae6b98a
--- /dev/null
+++ b/tests/epyccel/modules/cuda_module.py
@@ -0,0 +1,13 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import numpy as np
+
+g = np.float64(9.81)
+r0 = np.float32(1.0)
+rmin = 0.01
+rmax = 1.0
+
+skip_centre = True
+
+method = 3
+
+tiny = np.int32(4)
diff --git a/tests/epyccel/test_epyccel_modules.py b/tests/epyccel/test_epyccel_modules.py
index ad8ae0bd75..223f741bf0 100644
--- a/tests/epyccel/test_epyccel_modules.py
+++ b/tests/epyccel/test_epyccel_modules.py
@@ -200,3 +200,16 @@ def test_awkward_names(language):
     assert mod.function() == modnew.function()
     assert mod.pure() == modnew.pure()
     assert mod.allocate(1) == modnew.allocate(1)
+
+def test_cuda_module(language_with_cuda):
+    import modules.cuda_module as mod
+
+    modnew = epyccel(mod, language=language_with_cuda)
+
+    atts = ('g', 'r0', 'rmin', 'rmax', 'skip_centre',
+            'method', 'tiny')
+    for att in atts:
+        mod_att = getattr(mod, att)
+        modnew_att = getattr(modnew, att)
+        assert mod_att == modnew_att
+        assert type(mod_att) is type(modnew_att)

From 7da772a5096082d6268c6baf50cf2fc56c5d6152 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Thu, 27 Jun 2024 20:31:46 +0100
Subject: [PATCH 027/130] Add support for kernels (#42)

This pull request addresses issue #28 by implementing a new feature in
Pyccel that allows users to define custom GPU kernels. The syntax for
creating these kernels is inspired by Numba. and I also need to fix
issue #45 for testing purposes

**Commit Summary**

- Introduced KernelCall class
- Added cuda printer methods _print_KernelCall and _print_FunctionDef to
generate the corresponding CUDA representation for both kernel calls and
definitions
- Added IndexedFunctionCall  represents an indexed function call
- Added CUDA module and cuda.synchronize()
- Fixing a bug that I found in the header: it does not import the
necessary header for the used function

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
Co-authored-by: Emily Bourne <emily.bourne@epfl.ch>
---
 .dict_custom.txt                              |   1 +
 CHANGELOG.md                                  |   2 +
 docs/cuda.md                                  |  23 +++
 pyccel/ast/core.py                            |  37 ++++
 pyccel/ast/cuda.py                            |  65 +++++++
 pyccel/ast/cudaext.py                         |  42 +++++
 pyccel/ast/utilities.py                       |   4 +-
 pyccel/codegen/printing/cucode.py             |  46 ++++-
 pyccel/cuda/__init__.py                       |  10 +
 pyccel/cuda/cuda_sync_primitives.py           |  16 ++
 pyccel/decorators.py                          |  32 ++++
 pyccel/errors/messages.py                     |   8 +
 pyccel/parser/semantic.py                     |  84 ++++++++-
 pyccel/parser/syntactic.py                    |   4 +
 tests/conftest.py                             |   9 +
 tests/cuda/test_kernel_semantic.py            | 176 ++++++++++++++++++
 tests/pyccel/scripts/kernel/hello_kernel.py   |  19 ++
 .../scripts/kernel/kernel_name_collision.py   |   8 +
 tests/pyccel/test_pyccel.py                   |  22 ++-
 19 files changed, 599 insertions(+), 9 deletions(-)
 create mode 100644 docs/cuda.md
 create mode 100644 pyccel/ast/cuda.py
 create mode 100644 pyccel/ast/cudaext.py
 create mode 100644 pyccel/cuda/__init__.py
 create mode 100644 pyccel/cuda/cuda_sync_primitives.py
 create mode 100644 tests/cuda/test_kernel_semantic.py
 create mode 100644 tests/pyccel/scripts/kernel/hello_kernel.py
 create mode 100644 tests/pyccel/scripts/kernel/kernel_name_collision.py

diff --git a/.dict_custom.txt b/.dict_custom.txt
index ae99f31ed4..5d99e21194 100644
--- a/.dict_custom.txt
+++ b/.dict_custom.txt
@@ -118,3 +118,4 @@ datatyping
 datatypes
 indexable
 traceback
+GPUs
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b897e14385..717f638bf3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file.
 
 -   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
 -   #48 : Fix incorrect handling of imports in `cuda`.
+-   #42 : Add support for custom kernel in`cuda`.
+-   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
 
 ## \[UNRELEASED\]
 
diff --git a/docs/cuda.md b/docs/cuda.md
new file mode 100644
index 0000000000..de30d52b80
--- /dev/null
+++ b/docs/cuda.md
@@ -0,0 +1,23 @@
+# Getting started GPU
+
+Pyccel now supports NVIDIA CUDA, empowering users to accelerate numerical computations on GPUs seamlessly. With Pyccel's high-level syntax and automatic code generation, harnessing the power of CUDA becomes effortless. This documentation provides a quick guide to enabling CUDA in Pyccel
+
+## Cuda Decorator
+
+### kernel
+
+The kernel decorator allows the user to declare a CUDA kernel. The kernel can be defined in Python, and the syntax is similar to that of Numba.
+
+```python
+from pyccel.decorators import kernel
+
+@kernel
+def my_kernel():
+    pass
+
+blockspergrid = 1
+threadsperblock = 1
+# Call your kernel function
+my_kernel[blockspergrid, threadsperblock]()
+
+```
\ No newline at end of file
diff --git a/pyccel/ast/core.py b/pyccel/ast/core.py
index 013f206dd6..f0e5cc67f1 100644
--- a/pyccel/ast/core.py
+++ b/pyccel/ast/core.py
@@ -73,6 +73,7 @@
     'If',
     'IfSection',
     'Import',
+    'IndexedFunctionCall',
     'InProgram',
     'InlineFunctionDef',
     'Interface',
@@ -2065,6 +2066,42 @@ def _ignore(cls, c):
         """
         return c is None or isinstance(c, (FunctionDef, *cls._ignored_types))
 
+class IndexedFunctionCall(FunctionCall):
+    """
+    Represents an indexed function call in the code.
+
+    Class representing indexed function calls, encapsulating all
+    relevant information for such calls within the code base.
+
+    Parameters
+    ----------
+    func : FunctionDef
+        The function being called.
+
+    args : iterable of FunctionCallArgument
+        The arguments passed to the function.
+
+    indexes : iterable of TypedAstNode
+        The indexes of the function call.
+
+    current_function : FunctionDef, optional
+        The function where the call takes place.
+    """
+    __slots__ = ('_indexes',)
+    _attribute_nodes = FunctionCall._attribute_nodes + ('_indexes',)
+    def __init__(self, func, args, indexes, current_function = None):
+        self._indexes = indexes
+        super().__init__(func, args, current_function)
+
+    @property
+    def indexes(self):
+        """
+        Indexes of function call.
+
+        Represents the indexes of the function call
+        """
+        return self._indexes
+
 class ConstructorCall(FunctionCall):
 
     """
diff --git a/pyccel/ast/cuda.py b/pyccel/ast/cuda.py
new file mode 100644
index 0000000000..f1e50ef7f0
--- /dev/null
+++ b/pyccel/ast/cuda.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+CUDA Module
+This module provides a collection of classes and utilities for CUDA programming.
+"""
+from pyccel.ast.core import FunctionCall
+
+__all__ = (
+    'KernelCall',
+)
+
+class KernelCall(FunctionCall):
+    """
+    Represents a kernel function call in the code.
+
+    The class serves as a representation of a kernel
+    function call within the codebase.
+
+    Parameters
+    ----------
+    func : FunctionDef
+        The definition of the function being called.
+
+    args : iterable of FunctionCallArgument
+        The arguments passed to the function.
+
+    num_blocks : TypedAstNode
+        The number of blocks. These objects must have a primitive type of `PrimitiveIntegerType`.
+
+    tp_block : TypedAstNode
+        The number of threads per block. These objects must have a primitive type of `PrimitiveIntegerType`.
+
+    current_function : FunctionDef, optional
+        The function where the call takes place.
+    """
+    __slots__ = ('_num_blocks','_tp_block')
+    _attribute_nodes = (*FunctionCall._attribute_nodes, '_num_blocks', '_tp_block')
+
+    def __init__(self, func, args, num_blocks, tp_block, current_function = None):
+        self._num_blocks = num_blocks
+        self._tp_block = tp_block
+        super().__init__(func, args, current_function)
+
+    @property
+    def num_blocks(self):
+        """
+        The number of blocks in the kernel being called.
+
+        The number of blocks in the kernel being called.
+        """
+        return self._num_blocks
+
+    @property
+    def tp_block(self):
+        """
+        The number of threads per block.
+
+        The number of threads per block.
+        """
+        return self._tp_block
+
diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
new file mode 100644
index 0000000000..b540f20993
--- /dev/null
+++ b/pyccel/ast/cudaext.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+CUDA Extension Module
+Provides CUDA functionality for code generation.
+"""
+from .internals      import PyccelFunction
+
+from .datatypes      import VoidType
+from .core           import Module, PyccelFunctionDef
+
+__all__ = (
+    'CudaSynchronize',
+)
+
+class CudaSynchronize(PyccelFunction):
+    """
+    Represents a call to Cuda.synchronize for code generation.
+
+    This class serves as a representation of the Cuda.synchronize method.
+    """
+    __slots__ = ()
+    _attribute_nodes = ()
+    _shape     = None
+    _class_type = VoidType()
+    def __init__(self):
+        super().__init__()
+
+cuda_funcs = {
+    'synchronize'       : PyccelFunctionDef('synchronize' , CudaSynchronize),
+}
+
+cuda_mod = Module('cuda',
+    variables=[],
+    funcs=cuda_funcs.values(),
+    imports=[]
+)
+
diff --git a/pyccel/ast/utilities.py b/pyccel/ast/utilities.py
index 1e6c0422ab..e5cd77b168 100644
--- a/pyccel/ast/utilities.py
+++ b/pyccel/ast/utilities.py
@@ -25,6 +25,7 @@
 from .literals      import LiteralInteger, LiteralEllipsis, Nil
 from .mathext       import math_mod
 from .sysext        import sys_mod
+from .cudaext       import cuda_mod
 
 from .numpyext      import (NumpyEmpty, NumpyArray, numpy_mod,
                             NumpyTranspose, NumpyLinspace)
@@ -49,7 +50,8 @@
 decorators_mod = Module('decorators',(),
         funcs = [PyccelFunctionDef(d, PyccelFunction) for d in pyccel_decorators.__all__])
 pyccel_mod = Module('pyccel',(),(),
-        imports = [Import('decorators', decorators_mod)])
+        imports = [Import('decorators', decorators_mod),
+                    Import('cuda', cuda_mod)])
 
 # TODO add documentation
 builtin_import_registry = Module('__main__',
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 277d2a3a6a..cd26843017 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -9,11 +9,12 @@
 enabling the direct translation of high-level Pyccel expressions into CUDA code.
 """
 
-from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers
+from pyccel.codegen.printing.ccode  import CCodePrinter
 
-from pyccel.ast.core        import Import, Module
+from pyccel.ast.core                import Import, Module
+from pyccel.ast.literals            import Nil
 
-from pyccel.errors.errors   import Errors
+from pyccel.errors.errors           import Errors
 
 
 errors = Errors()
@@ -61,6 +62,44 @@ def _print_Module(self, expr):
         self.exit_scope()
         return code
 
+    def function_signature(self, expr, print_arg_names = True):
+        """
+        Get the Cuda representation of the function signature.
+
+        Extract from the function definition `expr` all the
+        information (name, input, output) needed to create the
+        function signature and return a string describing the
+        function.
+        This is not a declaration as the signature does not end
+        with a semi-colon.
+
+        Parameters
+        ----------
+        expr : FunctionDef
+            The function definition for which a signature is needed.
+
+        print_arg_names : bool, default : True
+            Indicates whether argument names should be printed.
+
+        Returns
+        -------
+        str
+            Signature of the function.
+        """
+        cuda_decorater = '__global__' if 'kernel' in expr.decorators else ''
+        c_function_signature = super().function_signature(expr, print_arg_names)
+        return f'{cuda_decorater} {c_function_signature}'
+
+    def _print_KernelCall(self, expr):
+        func = expr.funcdef
+        args = [a.value or Nil() for a in expr.args]
+
+        args = ', '.join(self._print(a) for a in args)
+        return f"{func.name}<<<{expr.num_blocks}, {expr.tp_block}>>>({args});\n"
+
+    def _print_CudaSynchronize(self, expr):
+        return 'cudaDeviceSynchronize();\n'
+
     def _print_ModuleHeader(self, expr):
         self.set_scope(expr.module.scope)
         self._in_header = True
@@ -87,6 +126,7 @@ def _print_ModuleHeader(self, expr):
                     }}\n'
         return '\n'.join((f"#ifndef {name.upper()}_H",
                           f"#define {name.upper()}_H",
+                          imports,
                           global_variables,
                           function_declaration,
                           "#endif // {name.upper()}_H\n"))
diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py
new file mode 100644
index 0000000000..e8542ad5d5
--- /dev/null
+++ b/pyccel/cuda/__init__.py
@@ -0,0 +1,10 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+    This module is for exposing the CudaSubmodule functions.
+"""
+from .cuda_sync_primitives    import synchronize
+
+__all__ = ['synchronize']
diff --git a/pyccel/cuda/cuda_sync_primitives.py b/pyccel/cuda/cuda_sync_primitives.py
new file mode 100644
index 0000000000..f3442fe9e2
--- /dev/null
+++ b/pyccel/cuda/cuda_sync_primitives.py
@@ -0,0 +1,16 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+This submodule contains CUDA methods for Pyccel.
+"""
+
+
+def synchronize():
+    """
+    Synchronize CUDA device execution.
+
+    Synchronize CUDA device execution.
+    """
+
diff --git a/pyccel/decorators.py b/pyccel/decorators.py
index 1f640043db..77717a991f 100644
--- a/pyccel/decorators.py
+++ b/pyccel/decorators.py
@@ -19,6 +19,7 @@
     'sympy',
     'template',
     'types',
+    'kernel'
 )
 
 
@@ -109,3 +110,34 @@ def allow_negative_index(f,*args):
     def identity(f):
         return f
     return identity
+
+def kernel(f):
+    """
+    Decorator for marking a Python function as a kernel.
+
+    This class serves as a decorator to mark a Python function
+    as a kernel function, typically used for GPU computations.
+    This allows the function to be indexed with the number of blocks and threads.
+
+    Parameters
+    ----------
+    f : function
+        The function to which the decorator is applied.
+
+    Returns
+    -------
+    KernelAccessor
+        A class representing the kernel function.
+    """
+    class KernelAccessor:
+        """
+        Class representing the kernel function.
+
+        Class representing the kernel function.
+        """
+        def __init__(self, f):
+            self._f = f
+        def __getitem__(self, args):
+            return self._f
+
+    return KernelAccessor(f)
diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py
index 79eccc1df2..09966d810c 100644
--- a/pyccel/errors/messages.py
+++ b/pyccel/errors/messages.py
@@ -162,3 +162,11 @@
 WRONG_LINSPACE_ENDPOINT = 'endpoint argument must be boolean'
 NON_LITERAL_KEEP_DIMS = 'keep_dims argument must be a literal, otherwise rank is unknown'
 NON_LITERAL_AXIS = 'axis argument must be a literal, otherwise pyccel cannot determine which dimension to operate on'
+MISSING_KERNEL_CONFIGURATION = 'Kernel launch configuration not specified'
+INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch'
+INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call'
+INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call'
+
+
+
+
diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py
index e94b9c8413..fde10d6317 100644
--- a/pyccel/parser/semantic.py
+++ b/pyccel/parser/semantic.py
@@ -116,6 +116,8 @@
 from pyccel.ast.variable import IndexedElement, AnnotatedPyccelSymbol
 from pyccel.ast.variable import DottedName, DottedVariable
 
+from pyccel.ast.cuda import     KernelCall
+
 from pyccel.errors.errors import Errors
 from pyccel.errors.errors import PyccelSemanticError
 
@@ -133,7 +135,9 @@
         PYCCEL_RESTRICTION_LIST_COMPREHENSION_LIMITS, PYCCEL_RESTRICTION_LIST_COMPREHENSION_SIZE,
         UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE,
         PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT,
-        FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC)
+        FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET,
+        MISSING_KERNEL_CONFIGURATION,
+        INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK)
 
 from pyccel.parser.base      import BasicParser
 from pyccel.parser.syntactic import SyntaxParser
@@ -1139,6 +1143,67 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun
 
             return new_expr
 
+    def _handle_kernel(self, expr, func, args):
+        """
+        Create the node representing the kernel function call.
+
+        Create a FunctionCall or an instance of a PyccelInternalFunction
+        from the function information and arguments.
+
+        Parameters
+        ----------
+        expr : IndexedFunctionCall
+               Node has all the information about the function call.
+
+        func : FunctionDef | Interface | PyccelInternalFunction type
+               The function being called.
+
+        args : iterable of FunctionCallArgument
+               The arguments passed to the function.
+
+        Returns
+        -------
+        Pyccel.ast.cuda.KernelCall
+            The semantic representation of the kernel call.
+        """
+        if len(expr.indexes) != 2:
+            errors.report(INVALID_KERNEL_LAUNCH_CONFIG,
+                    symbol=expr,
+                    severity='fatal')
+        if len(func.results):
+            errors.report(f"cuda kernel function '{func.name}' returned a value in violation of the laid-down specification",
+                         symbol=expr,
+                         severity='fatal')
+        if isinstance(func, FunctionDef) and len(args) != len(func.arguments):
+            errors.report(f"{len(args)} argument types given, but function takes {len(func.arguments)} arguments",
+                symbol=expr,
+                severity='fatal')
+        if not isinstance(expr.indexes[0], (LiteralInteger)):
+            if isinstance(expr.indexes[0], PyccelSymbol):
+                num_blocks = self.get_variable(expr.indexes[0])
+
+                if not isinstance(num_blocks.dtype, PythonNativeInt):
+                    errors.report(INVALID_KERNEL_CALL_BP_GRID,
+                    symbol = expr,
+                    severity='fatal')
+            else:
+                errors.report(INVALID_KERNEL_CALL_BP_GRID,
+                    symbol = expr,
+                    severity='fatal')
+        if not isinstance(expr.indexes[1], (LiteralInteger)):
+            if isinstance(expr.indexes[1], PyccelSymbol):
+                tp_block = self.get_variable(expr.indexes[1])
+                if not isinstance(tp_block.dtype, PythonNativeInt):
+                    errors.report(INVALID_KERNEL_CALL_TP_BLOCK,
+                    symbol = expr,
+                    severity='fatal')
+            else:
+                errors.report(INVALID_KERNEL_CALL_TP_BLOCK,
+                    symbol = expr,
+                    severity='fatal')
+        new_expr = KernelCall(func, args, expr.indexes[0], expr.indexes[1])
+        return new_expr
+
     def _sort_function_call_args(self, func_args, args):
         """
         Sort and add the missing call arguments to match the arguments in the function definition.
@@ -2815,6 +2880,23 @@ def _visit_Lambda(self, expr):
                 expr = Lambda(tuple(expr.variables), expr_new)
         return expr
 
+    def _visit_IndexedFunctionCall(self, expr):
+        name     = expr.funcdef
+        name = self.scope.get_expected_name(name)
+        func     = self.scope.find(name, 'functions')
+        args = self._handle_function_args(expr.args)
+
+        if func is None:
+            return errors.report(UNDEFINED_FUNCTION, symbol=expr.funcdef,
+                    bounding_box=(self.current_ast_node.lineno, self.current_ast_node.col_offset),
+                    severity='fatal')
+
+        func = self._annotate_the_called_function_def(func)
+        if 'kernel' in func.decorators :
+            return self._handle_kernel(expr, func, args)
+        else:
+            return errors.report("Unknown function type",
+                symbol=expr, severity='fatal')
     def _visit_FunctionCall(self, expr):
         name     = expr.funcdef
         try:
diff --git a/pyccel/parser/syntactic.py b/pyccel/parser/syntactic.py
index 2967f4999b..3af7f0728a 100644
--- a/pyccel/parser/syntactic.py
+++ b/pyccel/parser/syntactic.py
@@ -64,6 +64,8 @@
 
 from pyccel.ast.type_annotations import SyntacticTypeAnnotation, UnionTypeAnnotation
 
+from pyccel.ast.core import IndexedFunctionCall
+
 from pyccel.parser.base        import BasicParser
 from pyccel.parser.extend_tree import extend_tree
 from pyccel.parser.utilities   import get_default_path
@@ -1102,6 +1104,8 @@ def _visit_Call(self, stmt):
         elif isinstance(func, DottedName):
             func_attr = FunctionCall(func.name[-1], args)
             func = DottedName(*func.name[:-1], func_attr)
+        elif isinstance(func,IndexedElement):
+            func = IndexedFunctionCall(func.base, args, func.indices)
         else:
             raise NotImplementedError(f' Unknown function type {type(func)}')
 
diff --git a/tests/conftest.py b/tests/conftest.py
index a5082ef6e8..4e74d1ec7a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -59,6 +59,15 @@ def pytest_runtest_teardown(item, nextitem):
 
 def pytest_addoption(parser):
     parser.addoption("--developer-mode", action="store_true", default=github_debugging, help="Show tracebacks when pyccel errors are raised")
+    parser.addoption("--gpu_available", action="store_true",
+                default=False, help="enable GPU tests")
+
+def pytest_generate_tests(metafunc):
+    if "gpu_available" in metafunc.fixturenames:
+        if metafunc.config.getoption("gpu_available"):
+            metafunc.parametrize("gpu_available", [True])
+        else:
+            metafunc.parametrize("gpu_available", [False])
 
 def pytest_sessionstart(session):
     # setup_stuff
diff --git a/tests/cuda/test_kernel_semantic.py b/tests/cuda/test_kernel_semantic.py
new file mode 100644
index 0000000000..00b74c3bea
--- /dev/null
+++ b/tests/cuda/test_kernel_semantic.py
@@ -0,0 +1,176 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import pytest
+
+from pyccel import epyccel
+from pyccel.decorators import kernel
+from pyccel.errors.errors import Errors, PyccelSemanticError
+from pyccel.errors.messages import (INVALID_KERNEL_CALL_TP_BLOCK,
+                                    INVALID_KERNEL_CALL_BP_GRID,
+                                    INVALID_KERNEL_LAUNCH_CONFIG)
+
+
+@pytest.mark.cuda
+def test_invalid_block_number():
+    def invalid_block_number():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1.0
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_block_number, language="cuda")
+
+    assert errors.has_errors()
+
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_CALL_BP_GRID == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_thread_per_block():
+    def invalid_thread_per_block():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1.0
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_thread_per_block, language="cuda")
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_CALL_TP_BLOCK == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_launch_config_high():
+    def invalid_launch_config_high():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        third_param = 1
+        kernel_call[blocks_per_grid, threads_per_block, third_param]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_launch_config_high, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_launch_config_low():
+    def invalid_launch_config_low():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        kernel_call[blocks_per_grid]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_launch_config_low, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_arguments_for_kernel_call():
+    def invalid_arguments():
+        @kernel
+        def kernel_call(arg : int):
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_arguments, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "0 argument types given, but function takes 1 arguments" == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_arguments_for_kernel_call_2():
+    def invalid_arguments_():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block](1)
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_arguments_, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "1 argument types given, but function takes 0 arguments" == error_info.message
+
+
+@pytest.mark.cuda
+def test_kernel_return():
+    def kernel_return():
+        @kernel
+        def kernel_call():
+            return 7
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block](1)
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(kernel_return, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "cuda kernel function 'kernel_call' returned a value in violation of the laid-down specification" == error_info.message
diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py
new file mode 100644
index 0000000000..b6901b25a1
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/hello_kernel.py
@@ -0,0 +1,19 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+from pyccel import cuda
+
+@kernel
+def say_hello(its_morning : bool):
+    if(its_morning):
+        print("Hello and Good morning")
+    else:
+        print("Hello and Good afternoon")
+
+def f():
+    its_morning = True
+    say_hello[1,1](its_morning)
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
+
diff --git a/tests/pyccel/scripts/kernel/kernel_name_collision.py b/tests/pyccel/scripts/kernel/kernel_name_collision.py
new file mode 100644
index 0000000000..ac7abe25ae
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/kernel_name_collision.py
@@ -0,0 +1,8 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+
+@kernel
+def do():
+    pass
+
+do[1,1]()
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index ec1e846549..b4757a3c31 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -294,7 +294,7 @@ def compare_pyth_fort_output( p_output, f_output, dtype=float, language=None):
 #------------------------------------------------------------------------------
 def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True,
         cwd = None, pyccel_commands = "", output_dtype = float,
-        language = None, output_dir = None):
+        language = None, output_dir = None, execute_code = True):
     """
     Run pyccel and compare the output to ensure that the results
     are equivalent
@@ -394,13 +394,14 @@ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True,
             compile_fortran(cwd, output_test_file, dependencies)
         elif language == 'c':
             compile_c(cwd, output_test_file, dependencies)
-
-    lang_out = get_lang_output(output_test_file, language)
-    compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language)
+    if  execute_code:
+        lang_out = get_lang_output(output_test_file, language)
+        compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language)
 
 #==============================================================================
 # UNIT TESTS
 #==============================================================================
+
 def test_relative_imports_in_project(language):
 
     base_dir = os.path.dirname(os.path.realpath(__file__))
@@ -728,6 +729,19 @@ def test_multiple_results(language):
 def test_elemental(language):
     pyccel_test("scripts/decorators_elemental.py", language = language)
 
+#------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_hello_kernel(gpu_available):
+    types = str
+    pyccel_test("scripts/kernel/hello_kernel.py",
+            language="cuda", output_dtype=types , execute_code=gpu_available)
+
+#------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_kernel_collision(gpu_available):
+    pyccel_test("scripts/kernel/kernel_name_collision.py",
+            language="cuda", execute_code=gpu_available)
+
 #------------------------------------------------------------------------------
 def test_print_strings(language):
     types = str

From ca701d93be9db239b75084e1c5ddd2e0b28e2ab5 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 3 Jul 2024 17:37:02 +0100
Subject: [PATCH 028/130] Updated CUDA Name Clash Checker By Added
 CUDA-specific keywords (#60)

This pull request addresses issue #59 by adding more CUDA-specific
keywords to enhance the checking of variable/function names and prevent
name clashes

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
---
 CHANGELOG.md                              |  1 +
 pyccel/naming/cudanameclashchecker.py     | 36 ++++++++++++++++++++++-
 pyccel/naming/languagenameclashchecker.py |  5 ++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 717f638bf3..afdabc3ab7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file.
 
 -   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
 -   #48 : Fix incorrect handling of imports in `cuda`.
+-   #59 : Updated `cuda` clash checker.
 -   #42 : Add support for custom kernel in`cuda`.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
 
diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py
index 971204e912..c7aaa4952f 100644
--- a/pyccel/naming/cudanameclashchecker.py
+++ b/pyccel/naming/cudanameclashchecker.py
@@ -16,6 +16,7 @@ class CudaNameClashChecker(LanguageNameClashChecker):
     verify that they do not cause name clashes. Name clashes may be due to
     new variables, or due to the use of reserved keywords.
     """
+
     # Keywords as mentioned on https://en.cppreference.com/w/c/keyword
     keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const',
         'continue', 'default', 'do', 'double', 'else', 'enum',
@@ -37,7 +38,40 @@ class CudaNameClashChecker(LanguageNameClashChecker):
         'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX',
         'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer',
         'get_index', 'numpy_to_ndarray_strides',
-        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'])
+        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'
+        '__global__', '__device__', '__host__','__constant__', '__shared__',
+        '__managed__','threadIdx', 'blockIdx', 'blockDim', 'gridDim',
+        'warpSize', 'cudaMalloc', 'cudaFree', 'cudaMemcpy', 'cudaMemset',
+        'cudaMallocHost', 'cudaFreeHost', 'cudaMallocPitch',
+        'cudaMallocArray', 'cudaFreeArray', 'cudaHostAlloc',
+        'cudaHostRegister', 'cudaHostUnregister', 'cudaHostGetDevicePointer',
+        'cudaHostGetFlags', 'cudaDeviceSynchronize', 'cudaDeviceReset',
+        'cudaSetDevice', 'cudaGetDeviceCount', 'cudaGetDeviceProperties',
+        'cudaChooseDevice', 'cudaSetDeviceFlags', 'cudaGetDevice',
+        'cudaStreamCreate', 'cudaStreamDestroy', 'cudaStreamSynchronize',
+        'cudaStreamWaitEvent', 'cudaEventCreate', 'cudaEventDestroy', 'cudaEventRecord',
+        'cudaEventSynchronize', 'cudaEventElapsedTime', 'cuInit', 'cuDeviceGet',
+        'cuDeviceGetCount', 'cuDeviceGetName',
+        'cuDeviceComputeCapability', 'cuCtxCreate', 'cuCtxDestroy',
+        'cuCtxSynchronize', 'cuModuleLoad', 'cuModuleUnload',
+        'cuModuleGetFunction', 'cuModuleGetGlobal', 'cuModuleGetTexRef',
+        'cuMemAlloc', 'cuMemFree', 'cuMemcpyHtoD', 'cuMemcpyDtoH',
+        'cuMemcpyDtoD', 'cuMemcpyHtoDAsync', 'cuMemcpyDtoHAsync',
+        'cuMemcpyDtoDAsync', 'cuMemsetD8', 'cuMemsetD16', 'cuMemsetD32',
+        'cuMemsetD2D8', 'cuMemsetD2D16', 'cuMemsetD2D32', 'cuParamSetSize',
+        'cuParamSeti', 'cuParamSetf', 'cuParamSetv', 'cuLaunch', 'cuLaunchGrid',
+        'cuLaunchGridAsync', 'cuEventCreate', 'cuEventRecord', 'cuEventQuery',
+        'cuEventSynchronize', 'cuEventDestroy', 'cuEventElapsedTime',
+        'cuStreamCreate', 'cuStreamQuery', 'cuStreamSynchronize',
+        'cuStreamDestroy', 'cuFuncSetBlockShape', 'cuFuncSetSharedSize',
+        'cuFuncGetAttribute', 'cuTexRefCreate', 'cuTexRefDestroy',
+        'cuTexRefSetArray', 'cuTexRefSetAddress', 'cuTexRefSetAddress2D',
+        'cuTexRefSetFormat', 'cuTexRefSetAddressMode', 'cuTexRefSetFilterMode',
+        'cuTexRefSetFlags', 'cuTexRefGetAddress', 'cuTexRefGetArray',
+        'cuTexRefGetAddressMode', 'cuTexRefGetFilterMode', 'cuTexRefGetFormat',
+        'cuTexRefGetFlags', 'cuLaunchKernel', 'cuOccupancyMaxActiveBlocksPerMultiprocessor',
+        'cuOccupancyMaxPotentialBlockSize', 'cuOccupancyMaxPotentialBlockSizeWithFlags'
+    ])
 
     def has_clash(self, name, symbols):
         """
diff --git a/pyccel/naming/languagenameclashchecker.py b/pyccel/naming/languagenameclashchecker.py
index fa672a905b..d6415e6449 100644
--- a/pyccel/naming/languagenameclashchecker.py
+++ b/pyccel/naming/languagenameclashchecker.py
@@ -19,6 +19,11 @@ class LanguageNameClashChecker(metaclass = Singleton):
     """
     keywords = None
 
+    def __init__(self): #pylint: disable=useless-parent-delegation
+        # This __init__ function is required so the ArgumentSingleton can
+        # always detect a signature
+        super().__init__()
+
     def _get_collisionless_name(self, name, symbols):
         """
         Get a name which doesn't collision with keywords or symbols.

From 828d16646dd52174e2dda9742f30e45df87e07f2 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 3 Jul 2024 18:04:22 +0100
Subject: [PATCH 029/130] add handle for custom device (#61)

This pull request addresses issue
https://github.com/pyccel/pyccel-cuda/issues/41 by implementing a new
feature in Pyccel that allows users to define a custom device

**Commit Summary**

- Adding handler for custom device and its code generation.
- Adding test

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
---
 CHANGELOG.md                               |  1 +
 docs/cuda.md                               | 25 ++++++++++++++++-
 pyccel/codegen/printing/cucode.py          |  7 ++---
 pyccel/decorators.py                       | 19 +++++++++++++
 pyccel/errors/messages.py                  |  2 +-
 pyccel/parser/semantic.py                  |  7 ++++-
 tests/cuda/test_device_semantic.py         | 31 ++++++++++++++++++++++
 tests/pyccel/scripts/kernel/device_test.py | 18 +++++++++++++
 tests/pyccel/test_pyccel.py                |  8 ++++++
 9 files changed, 112 insertions(+), 6 deletions(-)
 create mode 100644 tests/cuda/test_device_semantic.py
 create mode 100644 tests/pyccel/scripts/kernel/device_test.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index afdabc3ab7..d5523ac5d7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file.
 -   #59 : Updated `cuda` clash checker.
 -   #42 : Add support for custom kernel in`cuda`.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
+-   #41 : Add support for custom device in`cuda`.
 
 ## \[UNRELEASED\]
 
diff --git a/docs/cuda.md b/docs/cuda.md
index de30d52b80..7643a4ac02 100644
--- a/docs/cuda.md
+++ b/docs/cuda.md
@@ -20,4 +20,27 @@ threadsperblock = 1
 # Call your kernel function
 my_kernel[blockspergrid, threadsperblock]()
 
-```
\ No newline at end of file
+```
+
+### device
+
+Device functions are similar to kernels, but are executed within the context of a kernel. They can be called only from kernels or device functions, and are typically used for operations that are too small to justify launching a separate kernel, or for operations that need to be performed repeatedly within the context of a kernel.
+
+```python
+from pyccel.decorators import device, kernel
+
+@device
+def add(x, y):
+    return x + y
+
+@kernel
+def my_kernel():
+    x = 1
+    y = 2
+    z = add(x, y)
+    print(z)
+
+my_kernel[1, 1]()
+
+```
+
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index cd26843017..7c01d93c47 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -86,9 +86,10 @@ def function_signature(self, expr, print_arg_names = True):
         str
             Signature of the function.
         """
-        cuda_decorater = '__global__' if 'kernel' in expr.decorators else ''
+        cuda_decorator = '__global__' if 'kernel' in expr.decorators else \
+        '__device__' if 'device' in expr.decorators else ''
         c_function_signature = super().function_signature(expr, print_arg_names)
-        return f'{cuda_decorater} {c_function_signature}'
+        return f'{cuda_decorator} {c_function_signature}'
 
     def _print_KernelCall(self, expr):
         func = expr.funcdef
@@ -109,7 +110,7 @@ def _print_ModuleHeader(self, expr):
         cuda_headers = ""
         for f in expr.module.funcs:
             if not f.is_inline:
-                if 'kernel' in f.decorators:  # Checking for 'kernel' decorator
+                if 'kernel' in f.decorators or 'device' in f.decorators:
                     cuda_headers += self.function_signature(f) + ';\n'
                 else:
                     funcs += self.function_signature(f) + ';\n'
diff --git a/pyccel/decorators.py b/pyccel/decorators.py
index 77717a991f..ff413fe443 100644
--- a/pyccel/decorators.py
+++ b/pyccel/decorators.py
@@ -11,6 +11,7 @@
 __all__ = (
     'allow_negative_index',
     'bypass',
+    'device',
     'elemental',
     'inline',
     'private',
@@ -141,3 +142,21 @@ def __getitem__(self, args):
             return self._f
 
     return KernelAccessor(f)
+
+def device(f):
+    """
+    Decorator for marking a function as a GPU device function.
+
+    This decorator is used to mark a Python function as a GPU device function.
+
+    Parameters
+    ----------
+    f : Function
+        The function to be marked as a device.
+
+    Returns
+    -------
+    f
+        The function marked as a device.
+    """
+    return f
diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py
index 09966d810c..5fe622c29b 100644
--- a/pyccel/errors/messages.py
+++ b/pyccel/errors/messages.py
@@ -166,7 +166,7 @@
 INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch'
 INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call'
 INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call'
-
+INVAlID_DEVICE_CALL = 'A function decorated with "device" should be called only from a "kernel" or another "device" function.'
 
 
 
diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py
index fde10d6317..7e8dd11bb4 100644
--- a/pyccel/parser/semantic.py
+++ b/pyccel/parser/semantic.py
@@ -136,9 +136,10 @@
         UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE,
         PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT,
         FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET,
-        MISSING_KERNEL_CONFIGURATION,
+        MISSING_KERNEL_CONFIGURATION, INVAlID_DEVICE_CALL,
         INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK)
 
+
 from pyccel.parser.base      import BasicParser
 from pyccel.parser.syntactic import SyntaxParser
 
@@ -1061,6 +1062,10 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun
         FunctionCall/PyccelFunction
             The semantic representation of the call.
         """
+
+        if isinstance(func, FunctionDef) and 'device' in func.decorators:
+            if 'kernel' not in self.scope.decorators and 'device' not in self.scope.decorators:
+                errors.report(INVAlID_DEVICE_CALL,symbol=expr, severity='fatal')
         if isinstance(func, PyccelFunctionDef):
             if use_build_functions:
                 annotation_method = '_build_' + func.cls_name.__name__
diff --git a/tests/cuda/test_device_semantic.py b/tests/cuda/test_device_semantic.py
new file mode 100644
index 0000000000..5723991961
--- /dev/null
+++ b/tests/cuda/test_device_semantic.py
@@ -0,0 +1,31 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import pytest
+
+from pyccel import epyccel
+from pyccel.decorators import device
+from pyccel.errors.errors import Errors, PyccelSemanticError
+from pyccel.errors.messages import (INVAlID_DEVICE_CALL,)
+
+
+@pytest.mark.cuda
+def test_invalid_device_call():
+    def invalid_device_call():
+        @device
+        def device_call():
+            pass
+        def fake_kernel_call():
+            device_call()
+
+        fake_kernel_call()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_device_call, language="cuda")
+
+    assert errors.has_errors()
+
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert INVAlID_DEVICE_CALL == error_info.message
diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py
new file mode 100644
index 0000000000..a4762a6242
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/device_test.py
@@ -0,0 +1,18 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import device, kernel
+from pyccel import cuda
+
+@device
+def device_call():
+    print("Hello from device")
+
+@kernel
+def kernel_call():
+    device_call()
+
+def f():
+    kernel_call[1,1]()
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index b4757a3c31..2d55c6e1cb 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -742,6 +742,14 @@ def test_kernel_collision(gpu_available):
     pyccel_test("scripts/kernel/kernel_name_collision.py",
             language="cuda", execute_code=gpu_available)
 
+#------------------------------------------------------------------------------
+
+@pytest.mark.cuda
+def test_device_call(gpu_available):
+    types = str
+    pyccel_test("scripts/kernel/device_test.py",
+            language="cuda", output_dtype=types, execute_code=gpu_available)
+
 #------------------------------------------------------------------------------
 def test_print_strings(language):
     types = str

From a28c7247e765743def5294b825a864b7bfd120fe Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Thu, 11 Jul 2024 14:45:50 +0100
Subject: [PATCH 030/130] work in progress

---
 pyccel/ast/cudatypes.py                      |  7 +++--
 pyccel/ast/numpytypes.py                     |  2 --
 pyccel/ast/variable.py                       |  2 +-
 pyccel/codegen/printing/ccode.py             |  2 +-
 pyccel/codegen/printing/cucode.py            | 30 ++++++++++++++------
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu | 18 ++++++------
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h  | 30 ++++++++++----------
 7 files changed, 52 insertions(+), 39 deletions(-)

diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index 3e9a8df1cf..df45abdb6e 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -12,6 +12,7 @@
 from .datatypes import FixedSizeNumericType, HomogeneousContainerType, PythonNativeBool
 from pyccel.utilities.metaclasses import ArgumentSingleton
 from .datatypes import pyccel_type_to_original_type, original_type_to_pyccel_type
+from .numpytypes import NumpyNDArrayType
 
 
 class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton):
@@ -51,11 +52,13 @@ def __add__(self, other):
         test_type = np.zeros(1, dtype = pyccel_type_to_original_type[self.element_type])
         if isinstance(other, FixedSizeNumericType):
             comparison_type = pyccel_type_to_original_type[other]()
-        elif isinstance(other, CudaArrayType):
+        elif isinstance(other, CudaArrayType) or isinstance(other, NumpyNDArrayType):
             comparison_type = np.zeros(1, dtype = pyccel_type_to_original_type[other.element_type])
         else:
             return NotImplemented
-        # Todo need to check for memory location as well
+        if(isinstance(other, CudaArrayType)):
+            assert self.memory_location == other.memory_location
+
         result_type = original_type_to_pyccel_type[np.result_type(test_type, comparison_type).type]
         rank = max(other.rank, self.rank)
         if rank < 2:
diff --git a/pyccel/ast/numpytypes.py b/pyccel/ast/numpytypes.py
index 1d56ce14e9..8bc1df828e 100644
--- a/pyccel/ast/numpytypes.py
+++ b/pyccel/ast/numpytypes.py
@@ -282,7 +282,6 @@ def __new__(cls, dtype, rank, order):
             return super().__new__(cls)
 
     def __init__(self, dtype, rank, order):
-        # print("reank", rank)
         assert isinstance(rank, int)
         assert order in (None, 'C', 'F')
         assert rank < 2 or order is not None
@@ -320,7 +319,6 @@ def __radd__(self, other):
 
     @lru_cache
     def __and__(self, other):
-        print("jouj draham diyali aba jalal")
         elem_type = self.element_type
         if isinstance(other, FixedSizeNumericType):
             return NumpyNDArrayType(elem_type and other)
diff --git a/pyccel/ast/variable.py b/pyccel/ast/variable.py
index b38dd100f5..c8b9fd95ef 100644
--- a/pyccel/ast/variable.py
+++ b/pyccel/ast/variable.py
@@ -98,7 +98,7 @@ class Variable(TypedAstNode):
     >>> Variable(PythonNativeInt(), DottedName('matrix', 'n_rows'))
     matrix.n_rows
     """
-    __slots__ = ('_name', '_alloc_shape', '_memory_handling', '_memory_location', '_is_const', '_is_target',
+    __slots__ = ('_name', '_alloc_shape', '_memory_handling', '_is_const', '_is_target',
             '_is_optional', '_allows_negative_indexes', '_cls_base', '_is_argument', '_is_temp',
             '_shape','_is_private','_class_type')
     _attribute_nodes = ()
diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index ec37735dff..de1ad669c3 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -1260,7 +1260,7 @@ def find_in_ndarray_type_registry(self, dtype):
             The code which declares the datatype in C.
         """
         try :
-            return self.ndarray_type_registry[dtype]
+            return self.cuda_ndarray_type_registry[dtype]
         except KeyError:
             raise errors.report(PYCCEL_RESTRICTION_TODO, #pylint: disable=raise-missing-from
                     symbol = dtype,
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index d343272979..8d405f15a4 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -16,15 +16,11 @@
 
 from pyccel.errors.errors           import Errors
 from pyccel.ast.cudatypes           import CudaArrayType
-from pyccel.ast.datatypes           import HomogeneousContainerType
+from pyccel.ast.datatypes           import HomogeneousContainerType, PythonNativeBool
 from pyccel.ast.numpytypes          import numpy_precision_map
 from pyccel.ast.cudaext             import CudaFull
-
-
-
-
-
-
+from pyccel.ast.numpytypes          import NumpyFloat32Type, NumpyFloat64Type, NumpyComplex64Type, NumpyComplex128Type
+from pyccel.ast.numpytypes          import NumpyInt8Type, NumpyInt16Type, NumpyInt32Type, NumpyInt64Type
 
 errors = Errors()
 
@@ -52,6 +48,16 @@ class CudaCodePrinter(CCodePrinter):
     """
     language = "cuda"
 
+    cuda_ndarray_type_registry = {
+                    NumpyFloat64Type()    : 'cu_double',
+                    NumpyFloat32Type()    : 'cu_float',
+                    NumpyComplex128Type() : 'cu_cdouble',
+                    NumpyComplex64Type()  : 'cu_cfloat',
+                    NumpyInt64Type()      : 'cu_int64',
+                    NumpyInt32Type()      : 'cu_int32',
+                    NumpyInt16Type()      : 'cu_int16',
+                    NumpyInt8Type()       : 'cu_int8',
+                    PythonNativeBool()    : 'cu_bool'}
     def __init__(self, filename, prefix_module = None):
 
         errors.set_target(filename)
@@ -172,10 +178,18 @@ def _print_Deallocate(self, expr):
         if not isinstance(expr.variable.class_type, CudaArrayType):
             return super()._print_Deallocate(expr)
 
-        if expr.variable.memory_location == 'host':
+        if expr.variable.class_type.memory_location == 'host':
             return f"cuda_free_host({var_code});\n"
         else:
             return f"cuda_free({var_code});\n"
+    def get_declare_type(self, expr):
+        class_type = expr.class_type
+        rank  = expr.rank
+        if not isinstance(class_type, CudaArrayType ) or rank <= 0:
+            return super().get_declare_type(expr)
+
+        dtype = 't_cuda_ndarray'
+        return dtype
 
     def _print_Assign(self, expr):
         rhs = expr.rhs
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
index d813540707..0dae780e54 100644
--- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
@@ -15,7 +15,7 @@ void    host_memory(void** devPtr, size_t size)
     cudaMallocHost(devPtr, size);
 }
 t_cuda_ndarray   cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape,
-        enum e_types type, bool is_view)
+        enum cu_types type, bool is_view)
 {
     t_cuda_ndarray arr;
     void (*fun_ptr_arr[])(void**, size_t) = {managed_memory, host_memory, device_memory};
@@ -24,25 +24,25 @@ t_cuda_ndarray   cuda_array_create(enum e_memory_locations location, int32_t nd,
     arr.type = type;
     switch (type)
     {
-        case nd_int8:
+        case cu_int8:
             arr.type_size = sizeof(int8_t);
             break;
-        case nd_int16:
+        case cu_int16:
             arr.type_size = sizeof(int16_t);
             break;
-        case nd_int32:
+        case cu_int32:
             arr.type_size = sizeof(int32_t);
             break;
-        case nd_int64:
+        case cu_int64:
             arr.type_size = sizeof(int64_t);
             break;
-        case nd_float:
+        case cu_float:
             arr.type_size = sizeof(float);
             break;
-        case nd_double:
+        case cu_double:
             arr.type_size = sizeof(double);
             break;
-        case nd_bool:
+        case cu_bool:
             arr.type_size = sizeof(bool);
             break;
     }
@@ -69,8 +69,6 @@ int32_t cuda_free_host(t_cuda_ndarray arr)
     arr.raw_data = NULL;
     cudaFree(arr.shape);
     arr.shape = NULL;
-    cudaFree(arr.strides);
-    arr.strides = NULL;
     return (1);
 }
 
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
index 13e8419594..8e8851e0a1 100644
--- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
@@ -4,18 +4,18 @@
 # include <cuda_runtime.h>
 # include <iostream>
 
-typedef enum e_types
+typedef enum cu_types
 {
-        nd_bool     = 0,
-        nd_int8     = 1,
-        nd_int16    = 3,
-        nd_int32    = 5,
-        nd_int64    = 7,
-        nd_float    = 11,
-        nd_double   = 12,
-        nd_cfloat   = 14,
-        nd_cdouble  = 15
-} t_types;
+        cu_bool     = 0,
+        cu_int8     = 1,
+        cu_int16    = 3,
+        cu_int32    = 5,
+        cu_int64    = 7,
+        cu_float    = 11,
+        cu_double   = 12,
+        cu_cfloat   = 14,
+        cu_cdouble  = 15
+} t_cu_types;
 
 
 enum e_memory_locations
@@ -38,7 +38,7 @@ typedef struct  s_cuda_ndarray
     /* shape 'size of each dimension' */
     int64_t                 *shape;
     /* strides 'number of elements to skip to get the next element' */
-    t_types            type;
+    cu_types            type;
     /* type size of the array elements */
     int32_t                 type_size;
     /* number of element in the array */
@@ -52,13 +52,13 @@ typedef struct  s_cuda_ndarray
 }               t_cuda_ndarray;
 
 
-t_cuda_ndarray  cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view ,
+t_cuda_ndarray  cuda_array_create(int32_t nd, int64_t *shape, enum cu_types type, bool is_view ,
 enum e_memory_locations location);
 int32_t cuda_free_host(t_cuda_ndarray arr);
-
+__host__ __device__
+int32_t cuda_free(t_cuda_ndarray arr);
 
 
 using namespace std;
 
-
 #endif
\ No newline at end of file

From 22686d77428e47daa99672f9701a189f5382accf Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Sun, 14 Jul 2024 16:50:20 +0100
Subject: [PATCH 031/130] work in progress

---
 pyccel/ast/cudatypes.py | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index df45abdb6e..29952c3efc 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -52,7 +52,7 @@ def __add__(self, other):
         test_type = np.zeros(1, dtype = pyccel_type_to_original_type[self.element_type])
         if isinstance(other, FixedSizeNumericType):
             comparison_type = pyccel_type_to_original_type[other]()
-        elif isinstance(other, CudaArrayType) or isinstance(other, NumpyNDArrayType):
+        elif isinstance(other, CudaArrayType) or (isinstance(other, NumpyNDArrayType) and self.memory_location == "host"):
             comparison_type = np.zeros(1, dtype = pyccel_type_to_original_type[other.element_type])
         else:
             return NotImplemented
@@ -69,24 +69,6 @@ def __add__(self, other):
             order = 'F' if other_f_contiguous and self_f_contiguous else 'C'
         return CudaArrayType(result_type, rank, order, self.memory_location)
 
-    @lru_cache
-    def __radd__(self, other):
-        return self.__add__(other)
-
-    @lru_cache
-    def __and__(self, other):
-        elem_type = self.element_type
-        if isinstance(other, FixedSizeNumericType):
-            return CudaArrayType(elem_type and other)
-        elif isinstance(other, CudaArrayType):
-            return CudaArrayType(elem_type+other.element_type)
-        else:
-            return NotImplemented
-
-    @lru_cache
-    def __rand__(self, other):
-        return self.__and__(other)
-
     @property
     def rank(self):
         """

From ab68eb44b1cfa8dab43abe86b5fe726e97298515 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Sun, 14 Jul 2024 22:18:24 +0100
Subject: [PATCH 032/130] work in progress

---
 pyccel/codegen/printing/ccode.py            | 37 +++++++++++++++++++++
 pyccel/codegen/printing/cucode.py           | 11 +-----
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h | 29 +++++++++++++++-
 3 files changed, 66 insertions(+), 11 deletions(-)

diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index 39e0c435f5..8b038fe6e0 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -287,6 +287,16 @@ class CCodePrinter(CodePrinter):
                       NumpyInt16Type()      : 'nd_int16',
                       NumpyInt8Type()       : 'nd_int8',
                       PythonNativeBool()    : 'nd_bool'}
+    cuda_ndarray_type_registry = {
+                    NumpyFloat64Type()    : 'cu_double',
+                    NumpyFloat32Type()    : 'cu_float',
+                    NumpyComplex128Type() : 'cu_cdouble',
+                    NumpyComplex64Type()  : 'cu_cfloat',
+                    NumpyInt64Type()      : 'cu_int64',
+                    NumpyInt32Type()      : 'cu_int32',
+                    NumpyInt16Type()      : 'cu_int16',
+                    NumpyInt8Type()       : 'cu_int8',
+                    PythonNativeBool()    : 'cu_bool'}
 
     type_to_format = {(PrimitiveFloatingPointType(),8) : '%.15lf',
                       (PrimitiveFloatingPointType(),4) : '%.6f',
@@ -1257,6 +1267,30 @@ def find_in_ndarray_type_registry(self, dtype):
         type within a ndarray.
         Raise PYCCEL_RESTRICTION_TODO if not found.
 
+        Parameters
+        ----------
+        dtype : DataType
+            The data type of the expression.
+
+        Returns
+        -------
+        str
+            The code which declares the datatype in C.
+        """
+        try :
+            return self.ndarray_type_registry[dtype]
+        except KeyError:
+            raise errors.report(PYCCEL_RESTRICTION_TODO, #pylint: disable=raise-missing-from
+                    symbol = dtype,
+                    severity='fatal')
+    def find_in_cuarray_type_registry(self, dtype):
+        """
+        Find the descriptor for the datatype in the ndarray_type_registry.
+
+        Find the tag which allows the user to access data of the specified
+        type within a ndarray.
+        Raise PYCCEL_RESTRICTION_TODO if not found.
+
         Parameters
         ----------
         dtype : DataType
@@ -1450,9 +1484,12 @@ def _print_IndexedElement(self, expr):
         inds = list(expr.indices)
         base_shape = base.shape
         allow_negative_indexes = expr.allows_negative_indexes
+        
         if isinstance(base.class_type, NumpyNDArrayType):
             #set dtype to the C struct types
             dtype = self.find_in_ndarray_type_registry(expr.dtype)
+        if isinstance(base.class_type, CudaArrayType):
+            dtype = self.find_in_cuarray_type_registry(expr.dtype)
         elif isinstance(base.class_type, HomogeneousContainerType):
             dtype = self.find_in_ndarray_type_registry(numpy_precision_map[(expr.dtype.primitive_type, expr.dtype.precision)])
         else:
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index c3cb7b8d71..d3ebfb2ef2 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -48,16 +48,7 @@ class CudaCodePrinter(CCodePrinter):
     """
     language = "cuda"
 
-    cuda_ndarray_type_registry = {
-                    NumpyFloat64Type()    : 'cu_double',
-                    NumpyFloat32Type()    : 'cu_float',
-                    NumpyComplex128Type() : 'cu_cdouble',
-                    NumpyComplex64Type()  : 'cu_cfloat',
-                    NumpyInt64Type()      : 'cu_int64',
-                    NumpyInt32Type()      : 'cu_int32',
-                    NumpyInt16Type()      : 'cu_int16',
-                    NumpyInt8Type()       : 'cu_int8',
-                    PythonNativeBool()    : 'cu_bool'}
+    
     def __init__(self, filename, prefix_module = None):
 
         errors.set_target(filename)
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
index 8e8851e0a1..46eddb6eb1 100644
--- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
@@ -4,6 +4,31 @@
 # include <cuda_runtime.h>
 # include <iostream>
 
+#define GET_INDEX_EXP1(t, arr, a) t(arr, 0, a)
+#define GET_INDEX_EXP2(t, arr, a, b) GET_INDEX_EXP1(t, arr, a) + t(arr, 1, b)
+#define GET_INDEX_EXP3(t, arr, a, b, c) GET_INDEX_EXP2(t, arr, a, b) + t(arr, 2, c)
+#define GET_INDEX_EXP4(t, arr, a, b, c, d) GET_INDEX_EXP3(t, arr, a, b, c) + t(arr, 3, d)
+#define GET_INDEX_EXP5(t, arr, a, b, c, d, e) GET_INDEX_EXP4(t, arr, a, b, c, d) + t(arr, 4, e)
+#define GET_INDEX_EXP6(t, arr, a, b, c, d, e, f) GET_INDEX_EXP5(t, arr, a, b, c, d, e) + t(arr, 5, f)
+#define GET_INDEX_EXP7(t, arr, a, b, c, d, e, f, g) GET_INDEX_EXP6(t, arr, a, b, c, d, e, f) + t(arr, 6, g)
+#define GET_INDEX_EXP8(t, arr, a, b, c, d, e, f, g, h) GET_INDEX_EXP7(t, arr, a, b, c, d, e, f, g) + t(arr, 7, h)
+#define GET_INDEX_EXP9(t, arr, a, b, c, d, e, f, g, h, i) GET_INDEX_EXP8(t, arr, a, b, c, d, e, f, g, h) + t(arr, 8, i)
+#define GET_INDEX_EXP10(t, arr, a, b, c, d, e, f, g, h, i, j) GET_INDEX_EXP9(t, arr, a, b, c, d, e, f, g, h, i) + t(arr, 9, j)
+#define GET_INDEX_EXP11(t, arr, a, b, c, d, e, f, g, h, i, j, k) GET_INDEX_EXP10(t, arr, a, b, c, d, e, f, g, h, i, j) + t(arr, 10, k)
+#define GET_INDEX_EXP12(t, arr, a, b, c, d, e, f, g, h, i, j, k, l) GET_INDEX_EXP11(t, arr, a, b, c, d, e, f, g, h, i, j, k) + t(arr, 11, l)
+#define GET_INDEX_EXP13(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m) GET_INDEX_EXP12(t, arr, a, b, c, d, e, f, g, h, i, j, k, l) + t(arr, 12, m)
+#define GET_INDEX_EXP14(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m, n) GET_INDEX_EXP13(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m) + t(arr, 13, n)
+#define GET_INDEX_EXP15(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) GET_INDEX_EXP14(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m, n) + t(arr, 14, o)
+
+#define NUM_ARGS_H1(dummy, x15, x14, x13, x12, x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1, x0, ...) x0
+#define NUM_ARGS(...) NUM_ARGS_H1(dummy, __VA_ARGS__, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#define GET_INDEX_FUNC_H2(t, arr, ndim, ...) GET_INDEX_EXP##ndim(t, arr, __VA_ARGS__)
+#define GET_INDEX_FUNC(t, arr, ndim, ...) GET_INDEX_FUNC_H2(t, arr, ndim, __VA_ARGS__)
+
+#define GET_INDEX(arr, ...) GET_INDEX_FUNC(INDEX, arr, NUM_ARGS(__VA_ARGS__), __VA_ARGS__)
+#define INDEX(arr, dim, a) (arr.strides[dim] * (a))
+#define GET_ELEMENT(arr, type, ...) arr.type[GET_INDEX(arr, __VA_ARGS__)]
+
 typedef enum cu_types
 {
         cu_bool     = 0,
@@ -38,7 +63,9 @@ typedef struct  s_cuda_ndarray
     /* shape 'size of each dimension' */
     int64_t                 *shape;
     /* strides 'number of elements to skip to get the next element' */
-    cu_types            type;
+    int64_t                 *strides;
+    /* data type of the array elements */
+    t_cu_types            type;
     /* type size of the array elements */
     int32_t                 type_size;
     /* number of element in the array */

From 73c4c81624708d8e056a7b126dcaca131431d075 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Sun, 14 Jul 2024 22:48:03 +0100
Subject: [PATCH 033/130] work in progress

---
 pyccel/codegen/printing/ccode.py             | 39 -----------
 pyccel/codegen/printing/cucode.py            |  3 +-
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu | 26 +++----
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h  | 72 ++------------------
 pyccel/stdlib/ndarrays/ndarrays.h            | 15 ++--
 5 files changed, 28 insertions(+), 127 deletions(-)

diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index 8b038fe6e0..d81ed35293 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -287,16 +287,6 @@ class CCodePrinter(CodePrinter):
                       NumpyInt16Type()      : 'nd_int16',
                       NumpyInt8Type()       : 'nd_int8',
                       PythonNativeBool()    : 'nd_bool'}
-    cuda_ndarray_type_registry = {
-                    NumpyFloat64Type()    : 'cu_double',
-                    NumpyFloat32Type()    : 'cu_float',
-                    NumpyComplex128Type() : 'cu_cdouble',
-                    NumpyComplex64Type()  : 'cu_cfloat',
-                    NumpyInt64Type()      : 'cu_int64',
-                    NumpyInt32Type()      : 'cu_int32',
-                    NumpyInt16Type()      : 'cu_int16',
-                    NumpyInt8Type()       : 'cu_int8',
-                    PythonNativeBool()    : 'cu_bool'}
 
     type_to_format = {(PrimitiveFloatingPointType(),8) : '%.15lf',
                       (PrimitiveFloatingPointType(),4) : '%.6f',
@@ -1283,30 +1273,6 @@ def find_in_ndarray_type_registry(self, dtype):
             raise errors.report(PYCCEL_RESTRICTION_TODO, #pylint: disable=raise-missing-from
                     symbol = dtype,
                     severity='fatal')
-    def find_in_cuarray_type_registry(self, dtype):
-        """
-        Find the descriptor for the datatype in the ndarray_type_registry.
-
-        Find the tag which allows the user to access data of the specified
-        type within a ndarray.
-        Raise PYCCEL_RESTRICTION_TODO if not found.
-
-        Parameters
-        ----------
-        dtype : DataType
-            The data type of the expression.
-
-        Returns
-        -------
-        str
-            The code which declares the datatype in C.
-        """
-        try :
-            return self.cuda_ndarray_type_registry[dtype]
-        except KeyError:
-            raise errors.report(PYCCEL_RESTRICTION_TODO, #pylint: disable=raise-missing-from
-                    symbol = dtype,
-                    severity='fatal')
 
     def get_declare_type(self, expr):
         """
@@ -1356,9 +1322,6 @@ def get_declare_type(self, expr):
                     errors.report(UNSUPPORTED_ARRAY_RANK, symbol=expr, severity='fatal')
                 self.add_import(c_imports['ndarrays'])
                 dtype = 't_ndarray'
-            elif isinstance(expr.class_type, CudaArrayType):
-                dtype = 't_cuda_ndarray'
-             
             else:
                 errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal')
         elif not isinstance(class_type, CustomDataType):
@@ -1488,8 +1451,6 @@ def _print_IndexedElement(self, expr):
         if isinstance(base.class_type, NumpyNDArrayType):
             #set dtype to the C struct types
             dtype = self.find_in_ndarray_type_registry(expr.dtype)
-        if isinstance(base.class_type, CudaArrayType):
-            dtype = self.find_in_cuarray_type_registry(expr.dtype)
         elif isinstance(base.class_type, HomogeneousContainerType):
             dtype = self.find_in_ndarray_type_registry(numpy_precision_map[(expr.dtype.primitive_type, expr.dtype.precision)])
         else:
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index d3ebfb2ef2..6d8a6bc305 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -179,8 +179,9 @@ def get_declare_type(self, expr):
         rank  = expr.rank
         if not isinstance(class_type, CudaArrayType ) or rank <= 0:
             return super().get_declare_type(expr)
+        self.add_import(c_imports['ndarrays'])
 
-        dtype = 't_cuda_ndarray'
+        dtype = 't_ndarray '
         return dtype
 
     def _print_Assign(self, expr):
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
index 0dae780e54..34890002f3 100644
--- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
@@ -14,35 +14,35 @@ void    host_memory(void** devPtr, size_t size)
 {
     cudaMallocHost(devPtr, size);
 }
-t_cuda_ndarray   cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape,
-        enum cu_types type, bool is_view)
+t_ndarray    cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape,
+        enum e_types type, bool is_view)
 {
-    t_cuda_ndarray arr;
+    t_ndarray  arr;
     void (*fun_ptr_arr[])(void**, size_t) = {managed_memory, host_memory, device_memory};
 
     arr.nd = nd;
     arr.type = type;
     switch (type)
     {
-        case cu_int8:
+        case nd_int8:
             arr.type_size = sizeof(int8_t);
             break;
-        case cu_int16:
+        case nd_int16:
             arr.type_size = sizeof(int16_t);
             break;
-        case cu_int32:
+        case nd_int32:
             arr.type_size = sizeof(int32_t);
             break;
-        case cu_int64:
+        case nd_int64:
             arr.type_size = sizeof(int64_t);
             break;
-        case cu_float:
+        case nd_float:
             arr.type_size = sizeof(float);
             break;
-        case cu_double:
+        case nd_double:
             arr.type_size = sizeof(double);
             break;
-        case cu_bool:
+        case nd_bool:
             arr.type_size = sizeof(bool);
             break;
     }
@@ -61,7 +61,7 @@ t_cuda_ndarray   cuda_array_create(enum e_memory_locations location, int32_t nd,
     return (arr);
 }
 
-int32_t cuda_free_host(t_cuda_ndarray arr)
+int32_t cuda_free_host(t_ndarray  arr)
 {
     if (arr.shape == NULL)
         return (0);
@@ -73,7 +73,7 @@ int32_t cuda_free_host(t_cuda_ndarray arr)
 }
 
 __host__ __device__
-int32_t cuda_free(t_cuda_ndarray arr)
+int32_t cuda_free(t_ndarray  arr)
 {
     if (arr.shape == NULL)
         return (0);
@@ -85,7 +85,7 @@ int32_t cuda_free(t_cuda_ndarray arr)
 }
 
 __host__ __device__
-int32_t cuda_free_pointer(t_cuda_ndarray arr)
+int32_t cuda_free_pointer(t_ndarray  arr)
 {
     if (arr.is_view == false || arr.shape == NULL)
         return (0);
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
index 46eddb6eb1..e074443662 100644
--- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
@@ -3,44 +3,8 @@
 
 # include <cuda_runtime.h>
 # include <iostream>
+#include "../ndarrays/ndarrays.h"
 
-#define GET_INDEX_EXP1(t, arr, a) t(arr, 0, a)
-#define GET_INDEX_EXP2(t, arr, a, b) GET_INDEX_EXP1(t, arr, a) + t(arr, 1, b)
-#define GET_INDEX_EXP3(t, arr, a, b, c) GET_INDEX_EXP2(t, arr, a, b) + t(arr, 2, c)
-#define GET_INDEX_EXP4(t, arr, a, b, c, d) GET_INDEX_EXP3(t, arr, a, b, c) + t(arr, 3, d)
-#define GET_INDEX_EXP5(t, arr, a, b, c, d, e) GET_INDEX_EXP4(t, arr, a, b, c, d) + t(arr, 4, e)
-#define GET_INDEX_EXP6(t, arr, a, b, c, d, e, f) GET_INDEX_EXP5(t, arr, a, b, c, d, e) + t(arr, 5, f)
-#define GET_INDEX_EXP7(t, arr, a, b, c, d, e, f, g) GET_INDEX_EXP6(t, arr, a, b, c, d, e, f) + t(arr, 6, g)
-#define GET_INDEX_EXP8(t, arr, a, b, c, d, e, f, g, h) GET_INDEX_EXP7(t, arr, a, b, c, d, e, f, g) + t(arr, 7, h)
-#define GET_INDEX_EXP9(t, arr, a, b, c, d, e, f, g, h, i) GET_INDEX_EXP8(t, arr, a, b, c, d, e, f, g, h) + t(arr, 8, i)
-#define GET_INDEX_EXP10(t, arr, a, b, c, d, e, f, g, h, i, j) GET_INDEX_EXP9(t, arr, a, b, c, d, e, f, g, h, i) + t(arr, 9, j)
-#define GET_INDEX_EXP11(t, arr, a, b, c, d, e, f, g, h, i, j, k) GET_INDEX_EXP10(t, arr, a, b, c, d, e, f, g, h, i, j) + t(arr, 10, k)
-#define GET_INDEX_EXP12(t, arr, a, b, c, d, e, f, g, h, i, j, k, l) GET_INDEX_EXP11(t, arr, a, b, c, d, e, f, g, h, i, j, k) + t(arr, 11, l)
-#define GET_INDEX_EXP13(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m) GET_INDEX_EXP12(t, arr, a, b, c, d, e, f, g, h, i, j, k, l) + t(arr, 12, m)
-#define GET_INDEX_EXP14(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m, n) GET_INDEX_EXP13(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m) + t(arr, 13, n)
-#define GET_INDEX_EXP15(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) GET_INDEX_EXP14(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m, n) + t(arr, 14, o)
-
-#define NUM_ARGS_H1(dummy, x15, x14, x13, x12, x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1, x0, ...) x0
-#define NUM_ARGS(...) NUM_ARGS_H1(dummy, __VA_ARGS__, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-#define GET_INDEX_FUNC_H2(t, arr, ndim, ...) GET_INDEX_EXP##ndim(t, arr, __VA_ARGS__)
-#define GET_INDEX_FUNC(t, arr, ndim, ...) GET_INDEX_FUNC_H2(t, arr, ndim, __VA_ARGS__)
-
-#define GET_INDEX(arr, ...) GET_INDEX_FUNC(INDEX, arr, NUM_ARGS(__VA_ARGS__), __VA_ARGS__)
-#define INDEX(arr, dim, a) (arr.strides[dim] * (a))
-#define GET_ELEMENT(arr, type, ...) arr.type[GET_INDEX(arr, __VA_ARGS__)]
-
-typedef enum cu_types
-{
-        cu_bool     = 0,
-        cu_int8     = 1,
-        cu_int16    = 3,
-        cu_int32    = 5,
-        cu_int64    = 7,
-        cu_float    = 11,
-        cu_double   = 12,
-        cu_cfloat   = 14,
-        cu_cdouble  = 15
-} t_cu_types;
 
 
 enum e_memory_locations
@@ -49,41 +13,13 @@ enum e_memory_locations
         allocateMemoryOnDevice
 };
 
-typedef enum e_order
-{
-    order_f,
-    order_c,
-} t_order;
-
-typedef struct  s_cuda_ndarray
-{
-    void            *raw_data;
-    /* number of dimensions */
-    int32_t                 nd;
-    /* shape 'size of each dimension' */
-    int64_t                 *shape;
-    /* strides 'number of elements to skip to get the next element' */
-    int64_t                 *strides;
-    /* data type of the array elements */
-    t_cu_types            type;
-    /* type size of the array elements */
-    int32_t                 type_size;
-    /* number of element in the array */
-    int32_t                 length;
-    /* size of the array */
-    int32_t                 buffer_size;
-    /* True if the array does not own the data */
-    bool                    is_view;
-    /* stores the order of the array: order_f or order_c */
-    t_order            order;
-}               t_cuda_ndarray;
 
 
-t_cuda_ndarray  cuda_array_create(int32_t nd, int64_t *shape, enum cu_types type, bool is_view ,
+t_ndarray   cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view ,
 enum e_memory_locations location);
-int32_t cuda_free_host(t_cuda_ndarray arr);
+int32_t cuda_free_host(t_ndarray  arr);
 __host__ __device__
-int32_t cuda_free(t_cuda_ndarray arr);
+int32_t cuda_free(t_ndarray  arr);
 
 
 using namespace std;
diff --git a/pyccel/stdlib/ndarrays/ndarrays.h b/pyccel/stdlib/ndarrays/ndarrays.h
index 082146d639..9764113705 100644
--- a/pyccel/stdlib/ndarrays/ndarrays.h
+++ b/pyccel/stdlib/ndarrays/ndarrays.h
@@ -80,12 +80,6 @@ typedef enum e_order
     order_c,
 } t_order;
 
-enum e_memory_locations
-{
-        managedMemory,
-        allocateMemoryOnHost,
-        allocateMemoryOnDevice
-};
 
 typedef struct  s_ndarray
 {
@@ -135,8 +129,10 @@ void        _array_fill_int64(int64_t c, t_ndarray arr);
 void        _array_fill_float(float c, t_ndarray arr);
 void        _array_fill_double(double c, t_ndarray arr);
 void        _array_fill_bool(bool c, t_ndarray arr);
+#ifndef __NVCC__
 void        _array_fill_cfloat(float complex c, t_ndarray arr);
 void        _array_fill_cdouble(double complex c, t_ndarray arr);
+#endif
 
 /* slicing */
                 /* creating a Slice object */
@@ -156,6 +152,7 @@ int32_t         free_pointer(t_ndarray* dump);
 int64_t         get_index(t_ndarray arr, ...);
 
 /* data converting between numpy and ndarray */
+
 int64_t     *numpy_to_ndarray_strides(int64_t *np_strides, int type_size, int nd);
 int64_t     *numpy_to_ndarray_shape(int64_t *np_shape, int nd);
 void print_ndarray_memory(t_ndarray nd);
@@ -171,8 +168,10 @@ int64_t            numpy_sum_int32(t_ndarray arr);
 int64_t            numpy_sum_int64(t_ndarray arr);
 float              numpy_sum_float32(t_ndarray arr);
 double             numpy_sum_float64(t_ndarray arr);
+#ifndef __NVCC__
 float complex      numpy_sum_complex64(t_ndarray arr);
 double complex     numpy_sum_complex128(t_ndarray arr);
+#endif
 
 /*numpy max/amax */
 
@@ -183,8 +182,10 @@ int64_t            numpy_amax_int32(t_ndarray arr);
 int64_t            numpy_amax_int64(t_ndarray arr);
 float              numpy_amax_float32(t_ndarray arr);
 double             numpy_amax_float64(t_ndarray arr);
+#ifndef __NVCC__
 float complex      numpy_amax_complex64(t_ndarray arr);
 double complex     numpy_amax_complex128(t_ndarray arr);
+#endif
 
 /* numpy min/amin */
 
@@ -195,7 +196,9 @@ int64_t            numpy_amin_int32(t_ndarray arr);
 int64_t            numpy_amin_int64(t_ndarray arr);
 float              numpy_amin_float32(t_ndarray arr);
 double             numpy_amin_float64(t_ndarray arr);
+#ifndef __NVCC__
 float complex      numpy_amin_complex64(t_ndarray arr);
 double complex     numpy_amin_complex128(t_ndarray arr);
+#endif
 
 #endif

From af4d097d481a8f81112b0c9e993bd13d8b8a3f2b Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:41:27 +0100
Subject: [PATCH 034/130] Trigger tests on push to devel or main branch

---
 .github/workflows/anaconda_linux.yml   | 2 +-
 .github/workflows/anaconda_windows.yml | 2 +-
 .github/workflows/intel.yml            | 2 +-
 .github/workflows/linux.yml            | 2 +-
 .github/workflows/macosx.yml           | 2 +-
 .github/workflows/pickle.yml           | 2 +-
 .github/workflows/pickle_wheel.yml     | 2 +-
 .github/workflows/windows.yml          | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/anaconda_linux.yml b/.github/workflows/anaconda_linux.yml
index 5a5384e5ce..525903a54f 100644
--- a/.github/workflows/anaconda_linux.yml
+++ b/.github/workflows/anaconda_linux.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/anaconda_windows.yml b/.github/workflows/anaconda_windows.yml
index 154a4d01e8..0f3f8a04ed 100644
--- a/.github/workflows/anaconda_windows.yml
+++ b/.github/workflows/anaconda_windows.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: windows-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
index 977d5f9afd..5f340e1088 100644
--- a/.github/workflows/intel.yml
+++ b/.github/workflows/intel.yml
@@ -29,7 +29,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index ad39cee725..664ae3aa60 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   matrix_prep:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
     steps:
diff --git a/.github/workflows/macosx.yml b/.github/workflows/macosx.yml
index 4768a64efa..f51041c0b8 100644
--- a/.github/workflows/macosx.yml
+++ b/.github/workflows/macosx.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: macos-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/pickle.yml b/.github/workflows/pickle.yml
index 052028a5cb..cc3864afd2 100644
--- a/.github/workflows/pickle.yml
+++ b/.github/workflows/pickle.yml
@@ -31,7 +31,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-matrix.outputs.python_version }}
       matrix: ${{ steps.set-matrix.outputs.matrix }}
diff --git a/.github/workflows/pickle_wheel.yml b/.github/workflows/pickle_wheel.yml
index 1dc82af503..718dc13dcc 100644
--- a/.github/workflows/pickle_wheel.yml
+++ b/.github/workflows/pickle_wheel.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 60c560ffee..827038a279 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: windows-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:

From 061996e13c8b8d7e0723a5a9f7fd12c50f1efd63 Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:46:33 +0100
Subject: [PATCH 035/130] Add cuda workflow to test cuda developments on CI

---
 .github/actions/coverage_install/action.yml |  2 +-
 .github/actions/linux_install/action.yml    | 10 +--
 .github/actions/pytest_run/action.yml       |  4 +-
 .github/actions/pytest_run_cuda/action.yml  | 17 +++++
 .github/actions/python_install/action.yml   | 17 +++++
 .github/workflows/cuda.yml                  | 83 +++++++++++++++++++++
 ci_tools/bot_messages/show_tests.txt        |  1 +
 ci_tools/bot_tools/bot_funcs.py             | 12 +--
 ci_tools/devel_branch_tests.py              |  1 +
 ci_tools/json_pytest_output.py              |  2 +-
 10 files changed, 135 insertions(+), 14 deletions(-)
 create mode 100644 .github/actions/pytest_run_cuda/action.yml
 create mode 100644 .github/actions/python_install/action.yml
 create mode 100644 .github/workflows/cuda.yml

diff --git a/.github/actions/coverage_install/action.yml b/.github/actions/coverage_install/action.yml
index ac5294e542..5732baee34 100644
--- a/.github/actions/coverage_install/action.yml
+++ b/.github/actions/coverage_install/action.yml
@@ -15,7 +15,7 @@ runs:
     - name: Directory Creation
       run: |
         INSTALL_DIR=$(cd tests; python -c "import pyccel; print(pyccel.__path__[0])")
-        SITE_DIR=$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
+        SITE_DIR=$(dirname ${INSTALL_DIR})
         echo -e "import coverage; coverage.process_startup()" > ${SITE_DIR}/pyccel_cov.pth
         echo -e "[run]\nparallel = True\nsource = ${INSTALL_DIR}\ndata_file = $(pwd)/.coverage\n[report]\ninclude = ${INSTALL_DIR}/*\n[xml]\noutput = cobertura.xml" > .coveragerc
         echo "SITE_DIR=${SITE_DIR}" >> $GITHUB_ENV
diff --git a/.github/actions/linux_install/action.yml b/.github/actions/linux_install/action.yml
index 8fb5cd8505..0ef9a69b8e 100644
--- a/.github/actions/linux_install/action.yml
+++ b/.github/actions/linux_install/action.yml
@@ -9,22 +9,22 @@ runs:
       shell: bash
     - name: Install fortran
       run:
-        sudo apt-get install gfortran
+        sudo apt-get install -y gfortran
       shell: bash
     - name: Install LaPack
       run:
-        sudo apt-get install libblas-dev liblapack-dev
+        sudo apt-get install -y libblas-dev liblapack-dev
       shell: bash
     - name: Install MPI
       run: |
-        sudo apt-get install libopenmpi-dev openmpi-bin
+        sudo apt-get install -y libopenmpi-dev openmpi-bin
         echo "MPI_OPTS=--oversubscribe" >> $GITHUB_ENV
       shell: bash
     - name: Install OpenMP
       run:
-        sudo apt-get install libomp-dev libomp5
+        sudo apt-get install -y libomp-dev libomp5
       shell: bash
     - name: Install Valgrind
       run:
-        sudo apt-get install valgrind
+        sudo apt-get install -y valgrind
       shell: bash
diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml
index 0b6f0f988d..b0bdc31f16 100644
--- a/.github/actions/pytest_run/action.yml
+++ b/.github/actions/pytest_run/action.yml
@@ -51,13 +51,13 @@ runs:
       working-directory: ./tests
       id: pytest_3
     - name: Test Fortran translations
-      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
+      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
       id: pytest_4
     - name: Test multi-file Fortran translations
       run: |
-        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
+        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml
new file mode 100644
index 0000000000..52092a6e02
--- /dev/null
+++ b/.github/actions/pytest_run_cuda/action.yml
@@ -0,0 +1,17 @@
+name: 'Pyccel pytest commands generating Ccuda'
+inputs:
+  shell_cmd:
+    description: 'Specifies the shell command (different for anaconda)'
+    required: false
+    default: "bash"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Ccuda tests with pytest
+      run: |
+        # Catch exit 5 (no tests found)
+        sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
+        pyccel-clean
+      shell: ${{ inputs.shell_cmd }}
+      working-directory: ./tests
diff --git a/.github/actions/python_install/action.yml b/.github/actions/python_install/action.yml
new file mode 100644
index 0000000000..f9b720e3e1
--- /dev/null
+++ b/.github/actions/python_install/action.yml
@@ -0,0 +1,17 @@
+name: 'Python installation commands'
+
+runs:
+  using: "composite"
+  steps:
+    - name: Install python
+      run:
+        sudo apt-get -y install python3-dev
+      shell: bash
+    - name: python as python3
+      run:
+        sudo apt-get -y install python-is-python3
+      shell: bash
+    - name: Install Pip
+      run:
+        sudo apt-get -y install python3-pip
+      shell: bash
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
new file mode 100644
index 0000000000..833ebf5d85
--- /dev/null
+++ b/.github/workflows/cuda.yml
@@ -0,0 +1,83 @@
+name: Cuda unit tests
+
+on:
+  workflow_dispatch:
+    inputs:
+      python_version:
+        required: false
+        type: string
+      ref:
+        required: false
+        type: string
+      check_run_id:
+        required: false
+        type: string
+      pr_repo:
+        required: false
+        type: string
+  push:
+    branches: [devel, main]
+
+env:
+  COMMIT: ${{ inputs.ref || github.event.ref }}
+  PEM: ${{ secrets.BOT_PEM }}
+  GITHUB_RUN_ID: ${{ github.run_id }}
+  GITHUB_CHECK_RUN_ID: ${{ inputs.check_run_id }}
+  PR_REPO: ${{ inputs.pr_repo || github.repository }}
+
+jobs:
+  Cuda:
+
+    runs-on: ubuntu-20.04
+    name: Unit tests
+
+    container: nvidia/cuda:11.7.1-devel-ubuntu20.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          ref: ${{ env.COMMIT }}
+          repository: ${{ env.PR_REPO }}
+      - name: Prepare docker
+        run: |
+          apt update && apt install sudo
+          TZ=Europe/France
+          ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+          DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata
+        shell: bash
+      - name: Install python (setup-python action doesn't work with containers)
+        uses: ./.github/actions/python_install
+      - name: "Setup"
+        id: token
+        run: |
+          pip install jwt requests
+          python ci_tools/setup_check_run.py cuda
+      - name: CUDA Version
+        run: nvcc --version # cuda install check
+      - name: Install dependencies
+        uses: ./.github/actions/linux_install
+      - name: Install Pyccel with tests
+        run: |
+            PATH=${PATH}:$HOME/.local/bin
+            echo "PATH=${PATH}" >> $GITHUB_ENV
+            python -m pip install --upgrade pip
+            python -m pip install --user .[test]
+        shell: bash
+      - name: Coverage install
+        uses: ./.github/actions/coverage_install
+      - name: Ccuda tests with pytest
+        id: cuda_pytest
+        uses: ./.github/actions/pytest_run_cuda
+      - name: Collect coverage information
+        continue-on-error: True
+        uses: ./.github/actions/coverage_collection
+      - name: Save code coverage report
+        uses: actions/upload-artifact@v3
+        with:
+          name: coverage-artifact
+          path: .coverage
+          retention-days: 1
+      - name: "Post completed"
+        if: always()
+        run:
+          python ci_tools/complete_check_run.py ${{ steps.cuda_pytest.outcome }}
+
diff --git a/ci_tools/bot_messages/show_tests.txt b/ci_tools/bot_messages/show_tests.txt
index adc07e8431..eb15492d2e 100644
--- a/ci_tools/bot_messages/show_tests.txt
+++ b/ci_tools/bot_messages/show_tests.txt
@@ -2,6 +2,7 @@ The following is a list of keywords which can be used to run tests. Tests in bol
 - **linux** : Runs the unit tests on a Linux system.
 - **windows** : Runs the unit tests on a Windows system.
 - **macosx** : Runs the unit tests on a MacOS X system.
+- **cuda** : Runs the cuda unit tests on a Linux system.
 - **coverage** : Runs the unit tests on a Linux system and checks the coverage of the tests.
 - **docs** : Checks if the documentation follows the numpydoc format.
 - **pylint** : Runs pylint on files which are too big to be handled by codacy.
diff --git a/ci_tools/bot_tools/bot_funcs.py b/ci_tools/bot_tools/bot_funcs.py
index 7084a01bb9..1621d1d089 100644
--- a/ci_tools/bot_tools/bot_funcs.py
+++ b/ci_tools/bot_tools/bot_funcs.py
@@ -23,7 +23,8 @@
         'pyccel_lint': '3.8',
         'pylint': '3.8',
         'spelling': '3.8',
-        'windows': '3.8'
+        'windows': '3.8',
+        'cuda': '-'
         }
 
 test_names = {
@@ -40,15 +41,16 @@
         'pyccel_lint': "Pyccel best practices",
         'pylint': "Python linting",
         'spelling': "Spelling verification",
-        'windows': "Unit tests on Windows"
+        'windows': "Unit tests on Windows",
+        'cuda': "Unit tests on Linux with cuda"
         }
 
-test_dependencies = {'coverage':['linux']}
+test_dependencies = {'coverage':['linux', 'cuda']}
 
 tests_with_base = ('coverage', 'docs', 'pyccel_lint', 'pylint')
 
 pr_test_keys = ('linux', 'windows', 'macosx', 'coverage', 'docs', 'pylint',
-                'pyccel_lint', 'spelling')
+                'pyccel_lint', 'spelling', 'cuda')
 
 review_stage_labels = ["needs_initial_review", "Ready_for_review", "Ready_to_merge"]
 
@@ -420,7 +422,7 @@ def is_test_required(self, commit_log, name, key, state):
             True if the test should be run, False otherwise.
         """
         print("Checking : ", name, key)
-        if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel'):
+        if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel', 'cuda'):
             has_relevant_change = lambda diff: any((f.startswith('pyccel/') or f.startswith('tests/')) #pylint: disable=unnecessary-lambda-assignment
                                                     and f.endswith('.py') and f != 'pyccel/version.py'
                                                     for f in diff)
diff --git a/ci_tools/devel_branch_tests.py b/ci_tools/devel_branch_tests.py
index 1102ef9e92..ec67b6c49a 100644
--- a/ci_tools/devel_branch_tests.py
+++ b/ci_tools/devel_branch_tests.py
@@ -15,3 +15,4 @@
     bot.run_tests(['anaconda_linux'], '3.10', force_run = True)
     bot.run_tests(['anaconda_windows'], '3.10', force_run = True)
     bot.run_tests(['intel'], '3.9', force_run = True)
+    bot.run_tests(['cuda'], '-', force_run = True)
diff --git a/ci_tools/json_pytest_output.py b/ci_tools/json_pytest_output.py
index 409ae76d72..b84f4a4c09 100644
--- a/ci_tools/json_pytest_output.py
+++ b/ci_tools/json_pytest_output.py
@@ -61,7 +61,7 @@ def     mini_md_summary(title, outcome, failed_tests):
     summary = ""
 
     failed_pattern = re.compile(r".*FAILED.*")
-    languages = ('c', 'fortran', 'python')
+    languages = ('c', 'fortran', 'python', 'cuda')
     pattern = {lang: re.compile(r".*\["+lang+r"\]\ \_.*") for lang in languages}
 
     for i in p_args.tests:

From 32457ccf514fd77d537a5a56d84ffaf58ef89b43 Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:41:27 +0100
Subject: [PATCH 036/130] Trigger tests on push to devel or main branch

---
 .github/workflows/deploy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 9111b47d52..cf52b1c624 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -10,7 +10,7 @@ jobs:
   waitForWorklows:
     name: Wait for workflows
     runs-on: ubuntu-latest
-    if: github.event.workflow_run.head_branch == 'main'
+    if: github.event.workflow_run.head_branch == 'main' && github.repository == 'pyccel/pyccel'
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4

From 9f0388997a8341657e141fbabced9bb8d895100c Mon Sep 17 00:00:00 2001
From: bauom <40796259+bauom@users.noreply.github.com>
Date: Wed, 28 Feb 2024 18:11:50 +0100
Subject: [PATCH 037/130] [init] Adding CUDA language/compiler and CodePrinter
 (#32)

This PR aims to make the C code compilable using nvcc. The cuda language was added as well as a CudaCodePrinter.

Changes to stdlib:

Wrapped expressions using complex types in an `ifndef __NVCC__` to avoid processing them with the nvcc compiler

---------

Co-authored-by: Mouad Elalj, EmilyBourne
---
 .dict_custom.txt                           |   1 +
 .github/actions/pytest_parallel/action.yml |   4 +-
 .github/actions/pytest_run/action.yml      |   4 +-
 .github/actions/pytest_run_cuda/action.yml |  11 +-
 CHANGELOG.md                               |   6 +
 pyccel/codegen/codegen.py                  |   8 +-
 pyccel/codegen/compiling/compilers.py      |   5 +-
 pyccel/codegen/pipeline.py                 |   5 +-
 pyccel/codegen/printing/cucode.py          |  74 +++++++++++
 pyccel/commands/console.py                 |   2 +-
 pyccel/compilers/default_compilers.py      |  13 +-
 pyccel/naming/__init__.py                  |   4 +-
 pyccel/naming/cudanameclashchecker.py      |  92 ++++++++++++++
 pyccel/stdlib/numpy/numpy_c.c              |   2 +
 pyccel/stdlib/numpy/numpy_c.h              |   2 +
 pytest.ini                                 |   1 +
 tests/conftest.py                          |  11 ++
 tests/epyccel/test_base.py                 | 136 ++++++++++-----------
 18 files changed, 298 insertions(+), 83 deletions(-)
 create mode 100644 pyccel/codegen/printing/cucode.py
 create mode 100644 pyccel/naming/cudanameclashchecker.py

diff --git a/.dict_custom.txt b/.dict_custom.txt
index 82a6b10d31..ae99f31ed4 100644
--- a/.dict_custom.txt
+++ b/.dict_custom.txt
@@ -110,6 +110,7 @@ Valgrind
 variadic
 subclasses
 oneAPI
+Cuda
 getter
 setter
 bitwise
diff --git a/.github/actions/pytest_parallel/action.yml b/.github/actions/pytest_parallel/action.yml
index c7c77d99c7..f91d84915b 100644
--- a/.github/actions/pytest_parallel/action.yml
+++ b/.github/actions/pytest_parallel/action.yml
@@ -10,8 +10,8 @@ runs:
   steps:
     - name: Test with pytest
       run: |
-        mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m parallel -rXx
-        #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m parallel -rXx
+        mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m "parallel and not cuda" -rXx
+        #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m "parallel and not cuda" -rXx
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
 
diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml
index b0bdc31f16..451fa39e92 100644
--- a/.github/actions/pytest_run/action.yml
+++ b/.github/actions/pytest_run/action.yml
@@ -51,13 +51,13 @@ runs:
       working-directory: ./tests
       id: pytest_3
     - name: Test Fortran translations
-      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
+      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
       id: pytest_4
     - name: Test multi-file Fortran translations
       run: |
-        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
+        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml
index 52092a6e02..46f90552ed 100644
--- a/.github/actions/pytest_run_cuda/action.yml
+++ b/.github/actions/pytest_run_cuda/action.yml
@@ -1,4 +1,4 @@
-name: 'Pyccel pytest commands generating Ccuda'
+name: 'Pyccel pytest commands generating Cuda'
 inputs:
   shell_cmd:
     description: 'Specifies the shell command (different for anaconda)'
@@ -11,7 +11,14 @@ runs:
     - name: Ccuda tests with pytest
       run: |
         # Catch exit 5 (no tests found)
-        sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
+        python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
+    - name: Final step
+      if: always()
+      id: status
+      run:
+        python ci_tools/json_pytest_output.py -t "Cuda Test Summary" --tests "Cuda tests:${{ steps.pytest_1.outcome }}:tests/s1_outfile.out"
+                      
+      shell: ${{ inputs.shell_cmd }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 695dc72cf7..d6928b0eca 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,12 @@
 # Change Log
 All notable changes to this project will be documented in this file.
 
+## \[Cuda - UNRELEASED\]
+
+### Added
+
+-   #32 : add support for `nvcc` Compiler and `cuda` language as a possible option.
+
 ## \[UNRELEASED\]
 
 ### Added
diff --git a/pyccel/codegen/codegen.py b/pyccel/codegen/codegen.py
index daf4559df4..8d4abb6bdb 100644
--- a/pyccel/codegen/codegen.py
+++ b/pyccel/codegen/codegen.py
@@ -9,16 +9,18 @@
 from pyccel.codegen.printing.fcode  import FCodePrinter
 from pyccel.codegen.printing.ccode  import CCodePrinter
 from pyccel.codegen.printing.pycode import PythonCodePrinter
+from pyccel.codegen.printing.cucode import CudaCodePrinter
 
 from pyccel.ast.core      import FunctionDef, Interface, ModuleHeader
 from pyccel.utilities.stage import PyccelStage
 
-_extension_registry = {'fortran': 'f90', 'c':'c',  'python':'py'}
-_header_extension_registry = {'fortran': None, 'c':'h',  'python':None}
+_extension_registry = {'fortran': 'f90', 'c':'c',  'python':'py', 'cuda':'cu'}
+_header_extension_registry = {'fortran': None, 'c':'h',  'python':None, 'cuda':'h'}
 printer_registry    = {
                         'fortran':FCodePrinter,
                         'c':CCodePrinter,
-                        'python':PythonCodePrinter
+                        'python':PythonCodePrinter,
+                        'cuda':CudaCodePrinter
                       }
 
 pyccel_stage = PyccelStage()
diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py
index c866ee5b1a..d909a5036e 100644
--- a/pyccel/codegen/compiling/compilers.py
+++ b/pyccel/codegen/compiling/compilers.py
@@ -444,7 +444,10 @@ def compile_shared_library(self, compile_obj, output_folder, verbose = False, sh
         # Collect compile information
         exec_cmd, includes, libs_flags, libdirs_flags, m_code = \
                 self._get_compile_components(compile_obj, accelerators)
-        linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags]
+        if self._info['exec'] == 'nvcc':
+            linker_libdirs_flags = ['-Xcompiler' if l == '-L' else f'"-Wl,-rpath,{l}"' for l in libdirs_flags]
+        else:
+            linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags]
 
         flags.insert(0,"-shared")
 
diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py
index 14087fb567..eb357fab74 100644
--- a/pyccel/codegen/pipeline.py
+++ b/pyccel/codegen/pipeline.py
@@ -180,9 +180,10 @@ def handle_error(stage):
     if language is None:
         language = 'fortran'
 
-    # Choose Fortran compiler
+    # Choose Default compiler
     if compiler is None:
-        compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', 'GNU')
+        default_compiler_family = 'nvidia' if language == 'cuda' else 'GNU'
+        compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', default_compiler_family)
 
     fflags = [] if fflags is None else fflags.split()
     wrapper_flags = [] if wrapper_flags is None else wrapper_flags.split()
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
new file mode 100644
index 0000000000..86146b065b
--- /dev/null
+++ b/pyccel/codegen/printing/cucode.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Provide tools for generating and handling CUDA code.
+This module is designed to interface Pyccel's Abstract Syntax Tree (AST) with CUDA,
+enabling the direct translation of high-level Pyccel expressions into CUDA code.
+"""
+
+from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers
+
+from pyccel.ast.core        import Import, Module
+
+from pyccel.errors.errors   import Errors
+
+
+errors = Errors()
+
+__all__ = ["CudaCodePrinter"]
+
+class CudaCodePrinter(CCodePrinter):
+    """
+    Print code in CUDA format.
+
+    This printer converts Pyccel's Abstract Syntax Tree (AST) into strings of CUDA code.
+    Navigation through this file utilizes _print_X functions,
+    as is common with all printers.
+
+    Parameters
+    ----------
+    filename : str
+            The name of the file being pyccelised.
+    prefix_module : str
+            A prefix to be added to the name of the module.
+    """
+    language = "cuda"
+
+    def __init__(self, filename, prefix_module = None):
+
+        errors.set_target(filename)
+
+        super().__init__(filename)
+
+    def _print_Module(self, expr):
+        self.set_scope(expr.scope)
+        self._current_module = expr.name
+        body = ''.join(self._print(i) for i in expr.body)
+
+        global_variables = ''.join(self._print(d) for d in expr.declarations)
+
+        # Print imports last to be sure that all additional_imports have been collected
+        imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()]
+        c_headers_imports = ''
+        local_imports = ''
+
+        for imp in imports:
+            if imp.source in c_library_headers:
+                c_headers_imports += self._print(imp)
+            else:
+                local_imports += self._print(imp)
+
+        imports = f'{c_headers_imports}\
+                    extern "C"{{\n\
+                    {local_imports}\
+                    }}'
+
+        code = f'{imports}\n\
+                 {global_variables}\n\
+                 {body}\n'
+
+        self.exit_scope()
+        return code
diff --git a/pyccel/commands/console.py b/pyccel/commands/console.py
index 596c440ec0..fcbec009de 100644
--- a/pyccel/commands/console.py
+++ b/pyccel/commands/console.py
@@ -80,7 +80,7 @@ def pyccel(files=None, mpi=None, openmp=None, openacc=None, output_dir=None, com
     # ... backend compiler options
     group = parser.add_argument_group('Backend compiler options')
 
-    group.add_argument('--language', choices=('fortran', 'c', 'python'), help='Generated language')
+    group.add_argument('--language', choices=('fortran', 'c', 'python', 'cuda'), help='Generated language')
 
     group.add_argument('--compiler', help='Compiler family or json file containing a compiler description {GNU,intel,PGI}')
 
diff --git a/pyccel/compilers/default_compilers.py b/pyccel/compilers/default_compilers.py
index 166085d22e..d47856773c 100644
--- a/pyccel/compilers/default_compilers.py
+++ b/pyccel/compilers/default_compilers.py
@@ -185,6 +185,15 @@
                 },
             'family': 'nvidia',
             }
+#------------------------------------------------------------
+nvcc_info = {'exec'         : 'nvcc',
+             'language'     : 'cuda',
+             'debug_flags'  : ("-g",),
+             'release_flags': ("-O3",),
+             'general_flags': ('--compiler-options', '-fPIC',),
+             'family'       : 'nvidia'
+            }
+
 
 #------------------------------------------------------------
 def change_to_lib_flag(lib):
@@ -288,6 +297,7 @@ def change_to_lib_flag(lib):
 pgfortran_info.update(python_info)
 nvc_info.update(python_info)
 nvfort_info.update(python_info)
+nvcc_info.update(python_info)
 
 available_compilers = {('GNU', 'c') : gcc_info,
                        ('GNU', 'fortran') : gfort_info,
@@ -296,6 +306,7 @@ def change_to_lib_flag(lib):
                        ('PGI', 'c') : pgcc_info,
                        ('PGI', 'fortran') : pgfortran_info,
                        ('nvidia', 'c') : nvc_info,
-                       ('nvidia', 'fortran') : nvfort_info}
+                       ('nvidia', 'fortran') : nvfort_info,
+                       ('nvidia', 'cuda'): nvcc_info}
 
 vendors = ('GNU','intel','PGI','nvidia')
diff --git a/pyccel/naming/__init__.py b/pyccel/naming/__init__.py
index 72c318d3ad..b3e4bbbe0e 100644
--- a/pyccel/naming/__init__.py
+++ b/pyccel/naming/__init__.py
@@ -10,7 +10,9 @@
 from .fortrannameclashchecker import FortranNameClashChecker
 from .cnameclashchecker import CNameClashChecker
 from .pythonnameclashchecker import PythonNameClashChecker
+from .cudanameclashchecker import CudaNameClashChecker
 
 name_clash_checkers = {'fortran':FortranNameClashChecker(),
         'c':CNameClashChecker(),
-        'python':PythonNameClashChecker()}
+        'python':PythonNameClashChecker(),
+        'cuda':CudaNameClashChecker()}
diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py
new file mode 100644
index 0000000000..971204e912
--- /dev/null
+++ b/pyccel/naming/cudanameclashchecker.py
@@ -0,0 +1,92 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Handles name clash problems in Cuda
+"""
+from .languagenameclashchecker import LanguageNameClashChecker
+
+class CudaNameClashChecker(LanguageNameClashChecker):
+    """
+    Class containing functions to help avoid problematic names in Cuda.
+
+    A class which provides functionalities to check or propose variable names and
+    verify that they do not cause name clashes. Name clashes may be due to
+    new variables, or due to the use of reserved keywords.
+    """
+    # Keywords as mentioned on https://en.cppreference.com/w/c/keyword
+    keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const',
+        'continue', 'default', 'do', 'double', 'else', 'enum',
+        'extern', 'float', 'for', 'goto', 'if', 'inline', 'int',
+        'long', 'register', 'restrict', 'return', 'short', 'signed',
+        'sizeof', 'static', 'struct', 'switch', 'typedef', 'union',
+        'unsigned', 'void', 'volatile', 'whie', '_Alignas',
+        '_Alignof', '_Atomic', '_Bool', '_Complex', 'Decimal128',
+        '_Decimal32', '_Decimal64', '_Generic', '_Imaginary',
+        '_Noreturn', '_Static_assert', '_Thread_local', 't_ndarray',
+        'array_create', 'new_slice', 'array_slicing', 'alias_assign',
+        'transpose_alias_assign', 'array_fill', 't_slice',
+        'GET_INDEX_EXP1', 'GET_INDEX_EXP2', 'GET_INDEX_EXP2',
+        'GET_INDEX_EXP3', 'GET_INDEX_EXP4', 'GET_INDEX_EXP5',
+        'GET_INDEX_EXP6', 'GET_INDEX_EXP7', 'GET_INDEX_EXP8',
+        'GET_INDEX_EXP9', 'GET_INDEX_EXP10', 'GET_INDEX_EXP11',
+        'GET_INDEX_EXP12', 'GET_INDEX_EXP13', 'GET_INDEX_EXP14',
+        'GET_INDEX_EXP15', 'NUM_ARGS_H1', 'NUM_ARGS',
+        'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX',
+        'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer',
+        'get_index', 'numpy_to_ndarray_strides',
+        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'])
+
+    def has_clash(self, name, symbols):
+        """
+        Indicate whether the proposed name causes any clashes.
+
+        Checks if a suggested name conflicts with predefined
+        keywords or specified symbols,returning true for a clash.
+        This method is crucial for maintaining namespace integrity and
+        preventing naming conflicts in code generation processes.
+
+        Parameters
+        ----------
+        name : str
+            The suggested name.
+        symbols : set
+            Symbols which should be considered as collisions.
+
+        Returns
+        -------
+        bool
+            True if the name is a collision.
+            False if the name is collision free.
+        """
+        return any(name == k for k in self.keywords) or \
+               any(name == s for s in symbols)
+
+    def get_collisionless_name(self, name, symbols):
+        """
+        Get a valid name which doesn't collision with symbols or Cuda keywords.
+
+        Find a new name based on the suggested name which will not cause
+        conflicts with Cuda keywords, does not appear in the provided symbols,
+        and is a valid name in Cuda code.
+
+        Parameters
+        ----------
+        name : str
+            The suggested name.
+        symbols : set
+            Symbols which should be considered as collisions.
+
+        Returns
+        -------
+        str
+            A new name which is collision free.
+        """
+        if len(name)>4 and all(name[i] == '_' for i in (0,1,-1,-2)):
+            # Ignore magic methods
+            return name
+        if name[0] == '_':
+            name = 'private'+name
+        return self._get_collisionless_name(name, symbols)
diff --git a/pyccel/stdlib/numpy/numpy_c.c b/pyccel/stdlib/numpy/numpy_c.c
index 7c9ecbbf6b..bc56214772 100644
--- a/pyccel/stdlib/numpy/numpy_c.c
+++ b/pyccel/stdlib/numpy/numpy_c.c
@@ -17,8 +17,10 @@ double  fsign(double x)
     return SIGN(x);
 }
 
+#ifndef __NVCC__
 /* numpy.sign for complex */
 double complex csign(double complex x)
 {
     return x ? ((!creal(x) && cimag(x) < 0) || (creal(x) < 0) ? -1 : 1) : 0;
 }
+#endif
diff --git a/pyccel/stdlib/numpy/numpy_c.h b/pyccel/stdlib/numpy/numpy_c.h
index e72cf3ad57..c2a16a5516 100644
--- a/pyccel/stdlib/numpy/numpy_c.h
+++ b/pyccel/stdlib/numpy/numpy_c.h
@@ -15,6 +15,8 @@
 
 long long int isign(long long int x);
 double fsign(double x);
+#ifndef __NVCC__
 double complex csign(double complex x);
+#endif
 
 #endif
diff --git a/pytest.ini b/pytest.ini
index 42eb0d72ba..3792ab65f9 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -9,3 +9,4 @@ markers =
     python: test to generate python code
     xdist_incompatible: test which compiles a file also compiled by another test
     external: test using an external dll (problematic with conda on Windows)
+    cuda: test to generate cuda code
diff --git a/tests/conftest.py b/tests/conftest.py
index 79144b6978..a5082ef6e8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -21,6 +21,17 @@
 def language(request):
     return request.param
 
+@pytest.fixture( params=[
+        pytest.param("fortran", marks = pytest.mark.fortran),
+        pytest.param("c", marks = pytest.mark.c),
+        pytest.param("python", marks = pytest.mark.python),
+        pytest.param("cuda", marks = pytest.mark.cuda)
+    ],
+    scope = "session"
+)
+def language_with_cuda(request):
+    return request.param
+
 def move_coverage(path_dir):
     for root, _, files in os.walk(path_dir):
         for name in files:
diff --git a/tests/epyccel/test_base.py b/tests/epyccel/test_base.py
index c22064d321..413f79eef1 100644
--- a/tests/epyccel/test_base.py
+++ b/tests/epyccel/test_base.py
@@ -7,128 +7,128 @@
 from utilities import epyccel_test
 
 
-def test_is_false(language):
-    test = epyccel_test(base.is_false, lang=language)
+def test_is_false(language_with_cuda):
+    test = epyccel_test(base.is_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_is_true(language):
-    test = epyccel_test(base.is_true, lang=language)
+def test_is_true(language_with_cuda):
+    test = epyccel_test(base.is_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_compare_is(language):
-    test = epyccel_test(base.compare_is, lang=language)
+def test_compare_is(language_with_cuda):
+    test = epyccel_test(base.compare_is, lang=language_with_cuda)
     test.compare_epyccel( True, True )
     test.compare_epyccel( True, False )
     test.compare_epyccel( False, True )
     test.compare_epyccel( False, False )
 
-def test_compare_is_not(language):
-    test = epyccel_test(base.compare_is_not, lang=language)
+def test_compare_is_not(language_with_cuda):
+    test = epyccel_test(base.compare_is_not, lang=language_with_cuda)
     test.compare_epyccel( True, True )
     test.compare_epyccel( True, False )
     test.compare_epyccel( False, True )
     test.compare_epyccel( False, False )
 
-def test_compare_is_int(language):
-    test = epyccel_test(base.compare_is_int, lang=language)
+def test_compare_is_int(language_with_cuda):
+    test = epyccel_test(base.compare_is_int, lang=language_with_cuda)
     test.compare_epyccel( True, 1 )
     test.compare_epyccel( True, 0 )
     test.compare_epyccel( False, 1 )
     test.compare_epyccel( False, 0 )
 
-def test_compare_is_not_int(language):
-    test = epyccel_test(base.compare_is_not_int, lang=language)
+def test_compare_is_not_int(language_with_cuda):
+    test = epyccel_test(base.compare_is_not_int, lang=language_with_cuda)
     test.compare_epyccel( True, 1 )
     test.compare_epyccel( True, 0 )
     test.compare_epyccel( False, 1 )
     test.compare_epyccel( False, 0 )
 
-def test_not_false(language):
-    test = epyccel_test(base.not_false, lang=language)
+def test_not_false(language_with_cuda):
+    test = epyccel_test(base.not_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not_true(language):
-    test = epyccel_test(base.not_true, lang=language)
+def test_not_true(language_with_cuda):
+    test = epyccel_test(base.not_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_eq_false(language):
-    test = epyccel_test(base.eq_false, lang=language)
+def test_eq_false(language_with_cuda):
+    test = epyccel_test(base.eq_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_eq_true(language):
-    test = epyccel_test(base.eq_true, lang=language)
+def test_eq_true(language_with_cuda):
+    test = epyccel_test(base.eq_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_neq_false(language):
-    test = epyccel_test(base.eq_false, lang=language)
+def test_neq_false(language_with_cuda):
+    test = epyccel_test(base.eq_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_neq_true(language):
-    test = epyccel_test(base.eq_true, lang=language)
+def test_neq_true(language_with_cuda):
+    test = epyccel_test(base.eq_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not(language):
-    test = epyccel_test(base.not_val, lang=language)
+def test_not(language_with_cuda):
+    test = epyccel_test(base.not_val, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not_int(language):
-    test = epyccel_test(base.not_int, lang=language)
+def test_not_int(language_with_cuda):
+    test = epyccel_test(base.not_int, lang=language_with_cuda)
     test.compare_epyccel( 0 )
     test.compare_epyccel( 4 )
 
-def test_compare_is_nil(language):
-    test = epyccel_test(base.is_nil, lang=language)
+def test_compare_is_nil(language_with_cuda):
+    test = epyccel_test(base.is_nil, lang=language_with_cuda)
     test.compare_epyccel( None )
 
-def test_compare_is_not_nil(language):
-    test = epyccel_test(base.is_not_nil, lang=language)
+def test_compare_is_not_nil(language_with_cuda):
+    test = epyccel_test(base.is_not_nil, lang=language_with_cuda)
     test.compare_epyccel( None )
 
-def test_cast_int(language):
-    test = epyccel_test(base.cast_int, lang=language)
+def test_cast_int(language_with_cuda):
+    test = epyccel_test(base.cast_int, lang=language_with_cuda)
     test.compare_epyccel( 4 )
-    test = epyccel_test(base.cast_float_to_int, lang=language)
+    test = epyccel_test(base.cast_float_to_int, lang=language_with_cuda)
     test.compare_epyccel( 4.5 )
 
-def test_cast_bool(language):
-    test = epyccel_test(base.cast_bool, lang=language)
+def test_cast_bool(language_with_cuda):
+    test = epyccel_test(base.cast_bool, lang=language_with_cuda)
     test.compare_epyccel( True )
 
-def test_cast_float(language):
-    test = epyccel_test(base.cast_float, lang=language)
+def test_cast_float(language_with_cuda):
+    test = epyccel_test(base.cast_float, lang=language_with_cuda)
     test.compare_epyccel( 4.5 )
-    test = epyccel_test(base.cast_int_to_float, lang=language)
+    test = epyccel_test(base.cast_int_to_float, lang=language_with_cuda)
     test.compare_epyccel( 4 )
 
-def test_if_0_int(language):
-    test = epyccel_test(base.if_0_int, lang=language)
+def test_if_0_int(language_with_cuda):
+    test = epyccel_test(base.if_0_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
     test.compare_epyccel( 0 )
 
-def test_if_0_real(language):
-    test = epyccel_test(base.if_0_real, lang=language)
+def test_if_0_real(language_with_cuda):
+    test = epyccel_test(base.if_0_real, lang=language_with_cuda)
     test.compare_epyccel( 22.3 )
     test.compare_epyccel( 0.0 )
 
-def test_same_int(language):
-    test = epyccel_test(base.is_same_int, lang=language)
+def test_same_int(language_with_cuda):
+    test = epyccel_test(base.is_same_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
-    test = epyccel_test(base.isnot_same_int, lang=language)
+    test = epyccel_test(base.isnot_same_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
 
-def test_same_float(language):
-    test = epyccel_test(base.is_same_float, lang=language)
+def test_same_float(language_with_cuda):
+    test = epyccel_test(base.is_same_float, lang=language_with_cuda)
     test.compare_epyccel( 22.2 )
-    test = epyccel_test(base.isnot_same_float, lang=language)
+    test = epyccel_test(base.isnot_same_float, lang=language_with_cuda)
     test.compare_epyccel( 22.2 )
 
 @pytest.mark.parametrize( 'language', [
@@ -150,28 +150,28 @@ def test_same_complex(language):
     test = epyccel_test(base.isnot_same_complex, lang=language)
     test.compare_epyccel( complex(2,3) )
 
-def test_is_types(language):
-    test = epyccel_test(base.is_types, lang=language)
+def test_is_types(language_with_cuda):
+    test = epyccel_test(base.is_types, lang=language_with_cuda)
     test.compare_epyccel( 1, 1.0 )
 
-def test_isnot_types(language):
-    test = epyccel_test(base.isnot_types, lang=language)
+def test_isnot_types(language_with_cuda):
+    test = epyccel_test(base.isnot_types, lang=language_with_cuda)
     test.compare_epyccel( 1, 1.0 )
 
-def test_none_is_none(language):
-    test = epyccel_test(base.none_is_none, lang=language)
+def test_none_is_none(language_with_cuda):
+    test = epyccel_test(base.none_is_none, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_none_isnot_none(language):
-    test = epyccel_test(base.none_isnot_none, lang=language)
+def test_none_isnot_none(language_with_cuda):
+    test = epyccel_test(base.none_isnot_none, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_pass_if(language):
-    test = epyccel_test(base.pass_if, lang=language)
+def test_pass_if(language_with_cuda):
+    test = epyccel_test(base.pass_if, lang=language_with_cuda)
     test.compare_epyccel(2)
 
-def test_pass2_if(language):
-    test = epyccel_test(base.pass2_if, lang=language)
+def test_pass2_if(language_with_cuda):
+    test = epyccel_test(base.pass2_if, lang=language_with_cuda)
     test.compare_epyccel(0.2)
     test.compare_epyccel(0.0)
 
@@ -192,15 +192,15 @@ def test_use_optional(language):
     test.compare_epyccel()
     test.compare_epyccel(6)
 
-def test_none_equality(language):
-    test = epyccel_test(base.none_equality, lang=language)
+def test_none_equality(language_with_cuda):
+    test = epyccel_test(base.none_equality, lang=language_with_cuda)
     test.compare_epyccel()
     test.compare_epyccel(6)
 
-def test_none_none_equality(language):
-    test = epyccel_test(base.none_none_equality, lang=language)
+def test_none_none_equality(language_with_cuda):
+    test = epyccel_test(base.none_none_equality, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_none_literal_equality(language):
-    test = epyccel_test(base.none_literal_equality, lang=language)
+def test_none_literal_equality(language_with_cuda):
+    test = epyccel_test(base.none_literal_equality, lang=language_with_cuda)
     test.compare_epyccel()

From 57b643e79dbbbccd582fe69967c9d95748db81a8 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 15 May 2024 12:58:50 +0100
Subject: [PATCH 038/130] Fix import handling (#49)

This pull request fixes https://github.com/pyccel/pyccel-cuda/issues/48, by implementing a tiny wrapper for CUDA and a wrapper for non-CUDA functionalities only with external 'C'.

**Commit Summary**

-    Implemented new header printer for CUDA.
-    Added CUDA wrapper assignment
-    Instead of wrapping all local headers, wrap only C functions with extern 'C'

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
---
 CHANGELOG.md                                |  3 +-
 pyccel/codegen/printing/cucode.py           | 45 ++++++++----
 pyccel/codegen/python_wrapper.py            |  4 ++
 pyccel/codegen/wrapper/cuda_to_c_wrapper.py | 78 +++++++++++++++++++++
 tests/epyccel/modules/cuda_module.py        | 13 ++++
 tests/epyccel/test_epyccel_modules.py       | 13 ++++
 6 files changed, 142 insertions(+), 14 deletions(-)
 create mode 100644 pyccel/codegen/wrapper/cuda_to_c_wrapper.py
 create mode 100644 tests/epyccel/modules/cuda_module.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d6928b0eca..b897e14385 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,8 @@ All notable changes to this project will be documented in this file.
 
 ### Added
 
--   #32 : add support for `nvcc` Compiler and `cuda` language as a possible option.
+-   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
+-   #48 : Fix incorrect handling of imports in `cuda`.
 
 ## \[UNRELEASED\]
 
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 86146b065b..277d2a3a6a 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -52,19 +52,7 @@ def _print_Module(self, expr):
 
         # Print imports last to be sure that all additional_imports have been collected
         imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()]
-        c_headers_imports = ''
-        local_imports = ''
-
-        for imp in imports:
-            if imp.source in c_library_headers:
-                c_headers_imports += self._print(imp)
-            else:
-                local_imports += self._print(imp)
-
-        imports = f'{c_headers_imports}\
-                    extern "C"{{\n\
-                    {local_imports}\
-                    }}'
+        imports = ''.join(self._print(i) for i in imports)
 
         code = f'{imports}\n\
                  {global_variables}\n\
@@ -72,3 +60,34 @@ def _print_Module(self, expr):
 
         self.exit_scope()
         return code
+
+    def _print_ModuleHeader(self, expr):
+        self.set_scope(expr.module.scope)
+        self._in_header = True
+        name = expr.module.name
+
+        funcs = ""
+        cuda_headers = ""
+        for f in expr.module.funcs:
+            if not f.is_inline:
+                if 'kernel' in f.decorators:  # Checking for 'kernel' decorator
+                    cuda_headers += self.function_signature(f) + ';\n'
+                else:
+                    funcs += self.function_signature(f) + ';\n'
+        global_variables = ''.join('extern '+self._print(d) for d in expr.module.declarations if not d.variable.is_private)
+        # Print imports last to be sure that all additional_imports have been collected
+        imports = [*expr.module.imports, *self._additional_imports.values()]
+        imports = ''.join(self._print(i) for i in imports)
+
+        self._in_header = False
+        self.exit_scope()
+        function_declaration = f'{cuda_headers}\n\
+                    extern "C"{{\n\
+                    {funcs}\
+                    }}\n'
+        return '\n'.join((f"#ifndef {name.upper()}_H",
+                          f"#define {name.upper()}_H",
+                          global_variables,
+                          function_declaration,
+                          "#endif // {name.upper()}_H\n"))
+
diff --git a/pyccel/codegen/python_wrapper.py b/pyccel/codegen/python_wrapper.py
index 9437727042..62c303fa64 100644
--- a/pyccel/codegen/python_wrapper.py
+++ b/pyccel/codegen/python_wrapper.py
@@ -13,6 +13,7 @@
 from pyccel.codegen.printing.fcode               import FCodePrinter
 from pyccel.codegen.wrapper.fortran_to_c_wrapper import FortranToCWrapper
 from pyccel.codegen.wrapper.c_to_python_wrapper  import CToPythonWrapper
+from pyccel.codegen.wrapper.cuda_to_c_wrapper    import CudaToCWrapper
 from pyccel.codegen.utilities                    import recompile_object
 from pyccel.codegen.utilities                    import copy_internal_library
 from pyccel.codegen.utilities                    import internal_libs
@@ -144,6 +145,9 @@ def create_shared_library(codegen,
                 verbose=verbose)
         timings['Bind C wrapping'] = time.time() - start_bind_c_compiling
         c_ast = bind_c_mod
+    elif language == 'cuda':
+        wrapper = CudaToCWrapper()
+        c_ast = wrapper.wrap(codegen.ast)
     else:
         c_ast = codegen.ast
 
diff --git a/pyccel/codegen/wrapper/cuda_to_c_wrapper.py b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py
new file mode 100644
index 0000000000..c0e24c7c09
--- /dev/null
+++ b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Module describing the code-wrapping class : CudaToPythonWrapper
+which creates an interface exposing Cuda code to C.
+"""
+
+from pyccel.ast.bind_c      import BindCModule
+from pyccel.errors.errors   import Errors
+from pyccel.ast.bind_c      import BindCVariable
+from .wrapper               import Wrapper
+
+errors = Errors()
+
+class CudaToCWrapper(Wrapper):
+    """
+    Class for creating a wrapper exposing Cuda code to C.
+
+    While CUDA is typically compatible with C by default.
+    this wrapper becomes necessary in scenarios where specific adaptations
+    or modifications are required to ensure seamless integration with C.
+    """
+
+    def _wrap_Module(self, expr):
+        """
+        Create a Module which is compatible with C.
+
+        Create a Module which provides an interface between C and the
+        Module described by expr.
+
+        Parameters
+        ----------
+        expr : pyccel.ast.core.Module
+            The module to be wrapped.
+
+        Returns
+        -------
+        pyccel.ast.core.BindCModule
+            The C-compatible module.
+        """
+        init_func = expr.init_func
+        if expr.interfaces:
+            errors.report("Interface wrapping is not yet supported for Cuda",
+                      severity='warning', symbol=expr)
+        if expr.classes:
+            errors.report("Class wrapping is not yet supported for Cuda",
+                      severity='warning', symbol=expr)
+
+        variables = [self._wrap(v) for v in expr.variables]
+
+        return BindCModule(expr.name, variables, expr.funcs,
+                init_func=init_func,
+                scope = expr.scope,
+                original_module=expr)
+
+    def _wrap_Variable(self, expr):
+        """
+        Create all objects necessary to expose a module variable to C.
+
+        Create and return the objects which must be printed in the wrapping
+        module in order to expose the variable to C
+
+        Parameters
+        ----------
+        expr : pyccel.ast.variables.Variable
+            The module variable.
+
+        Returns
+        -------
+        pyccel.ast.core.BindCVariable
+            The C-compatible variable. which must be printed in
+            the wrapping module to expose the variable.
+        """
+        return expr.clone(expr.name, new_class = BindCVariable)
+
diff --git a/tests/epyccel/modules/cuda_module.py b/tests/epyccel/modules/cuda_module.py
new file mode 100644
index 0000000000..bb7ae6b98a
--- /dev/null
+++ b/tests/epyccel/modules/cuda_module.py
@@ -0,0 +1,13 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import numpy as np
+
+g = np.float64(9.81)
+r0 = np.float32(1.0)
+rmin = 0.01
+rmax = 1.0
+
+skip_centre = True
+
+method = 3
+
+tiny = np.int32(4)
diff --git a/tests/epyccel/test_epyccel_modules.py b/tests/epyccel/test_epyccel_modules.py
index ad8ae0bd75..223f741bf0 100644
--- a/tests/epyccel/test_epyccel_modules.py
+++ b/tests/epyccel/test_epyccel_modules.py
@@ -200,3 +200,16 @@ def test_awkward_names(language):
     assert mod.function() == modnew.function()
     assert mod.pure() == modnew.pure()
     assert mod.allocate(1) == modnew.allocate(1)
+
+def test_cuda_module(language_with_cuda):
+    import modules.cuda_module as mod
+
+    modnew = epyccel(mod, language=language_with_cuda)
+
+    atts = ('g', 'r0', 'rmin', 'rmax', 'skip_centre',
+            'method', 'tiny')
+    for att in atts:
+        mod_att = getattr(mod, att)
+        modnew_att = getattr(modnew, att)
+        assert mod_att == modnew_att
+        assert type(mod_att) is type(modnew_att)

From af589a10e38815e4c0cce9b30e0f351818f419f4 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Thu, 27 Jun 2024 20:31:46 +0100
Subject: [PATCH 039/130] Add support for kernels (#42)

This pull request addresses issue #28 by implementing a new feature in
Pyccel that allows users to define custom GPU kernels. The syntax for
creating these kernels is inspired by Numba. and I also need to fix
issue #45 for testing purposes

**Commit Summary**

- Introduced KernelCall class
- Added cuda printer methods _print_KernelCall and _print_FunctionDef to
generate the corresponding CUDA representation for both kernel calls and
definitions
- Added IndexedFunctionCall  represents an indexed function call
- Added CUDA module and cuda.synchronize()
- Fixing a bug that I found in the header: it does not import the
necessary header for the used function

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
Co-authored-by: Emily Bourne <emily.bourne@epfl.ch>
---
 .dict_custom.txt                              |   1 +
 CHANGELOG.md                                  |   2 +
 docs/cuda.md                                  |  23 +++
 pyccel/ast/core.py                            |  37 ++++
 pyccel/ast/cuda.py                            |  65 +++++++
 pyccel/ast/cudaext.py                         |  42 +++++
 pyccel/ast/utilities.py                       |   4 +-
 pyccel/codegen/printing/cucode.py             |  46 ++++-
 pyccel/cuda/__init__.py                       |  10 +
 pyccel/cuda/cuda_sync_primitives.py           |  16 ++
 pyccel/decorators.py                          |  32 ++++
 pyccel/errors/messages.py                     |   8 +
 pyccel/parser/semantic.py                     |  84 ++++++++-
 pyccel/parser/syntactic.py                    |   4 +
 tests/conftest.py                             |   9 +
 tests/cuda/test_kernel_semantic.py            | 176 ++++++++++++++++++
 tests/pyccel/scripts/kernel/hello_kernel.py   |  19 ++
 .../scripts/kernel/kernel_name_collision.py   |   8 +
 tests/pyccel/test_pyccel.py                   |  22 ++-
 19 files changed, 599 insertions(+), 9 deletions(-)
 create mode 100644 docs/cuda.md
 create mode 100644 pyccel/ast/cuda.py
 create mode 100644 pyccel/ast/cudaext.py
 create mode 100644 pyccel/cuda/__init__.py
 create mode 100644 pyccel/cuda/cuda_sync_primitives.py
 create mode 100644 tests/cuda/test_kernel_semantic.py
 create mode 100644 tests/pyccel/scripts/kernel/hello_kernel.py
 create mode 100644 tests/pyccel/scripts/kernel/kernel_name_collision.py

diff --git a/.dict_custom.txt b/.dict_custom.txt
index ae99f31ed4..5d99e21194 100644
--- a/.dict_custom.txt
+++ b/.dict_custom.txt
@@ -118,3 +118,4 @@ datatyping
 datatypes
 indexable
 traceback
+GPUs
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b897e14385..717f638bf3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file.
 
 -   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
 -   #48 : Fix incorrect handling of imports in `cuda`.
+-   #42 : Add support for custom kernel in`cuda`.
+-   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
 
 ## \[UNRELEASED\]
 
diff --git a/docs/cuda.md b/docs/cuda.md
new file mode 100644
index 0000000000..de30d52b80
--- /dev/null
+++ b/docs/cuda.md
@@ -0,0 +1,23 @@
+# Getting started GPU
+
+Pyccel now supports NVIDIA CUDA, empowering users to accelerate numerical computations on GPUs seamlessly. With Pyccel's high-level syntax and automatic code generation, harnessing the power of CUDA becomes effortless. This documentation provides a quick guide to enabling CUDA in Pyccel
+
+## Cuda Decorator
+
+### kernel
+
+The kernel decorator allows the user to declare a CUDA kernel. The kernel can be defined in Python, and the syntax is similar to that of Numba.
+
+```python
+from pyccel.decorators import kernel
+
+@kernel
+def my_kernel():
+    pass
+
+blockspergrid = 1
+threadsperblock = 1
+# Call your kernel function
+my_kernel[blockspergrid, threadsperblock]()
+
+```
\ No newline at end of file
diff --git a/pyccel/ast/core.py b/pyccel/ast/core.py
index 013f206dd6..f0e5cc67f1 100644
--- a/pyccel/ast/core.py
+++ b/pyccel/ast/core.py
@@ -73,6 +73,7 @@
     'If',
     'IfSection',
     'Import',
+    'IndexedFunctionCall',
     'InProgram',
     'InlineFunctionDef',
     'Interface',
@@ -2065,6 +2066,42 @@ def _ignore(cls, c):
         """
         return c is None or isinstance(c, (FunctionDef, *cls._ignored_types))
 
+class IndexedFunctionCall(FunctionCall):
+    """
+    Represents an indexed function call in the code.
+
+    Class representing indexed function calls, encapsulating all
+    relevant information for such calls within the code base.
+
+    Parameters
+    ----------
+    func : FunctionDef
+        The function being called.
+
+    args : iterable of FunctionCallArgument
+        The arguments passed to the function.
+
+    indexes : iterable of TypedAstNode
+        The indexes of the function call.
+
+    current_function : FunctionDef, optional
+        The function where the call takes place.
+    """
+    __slots__ = ('_indexes',)
+    _attribute_nodes = FunctionCall._attribute_nodes + ('_indexes',)
+    def __init__(self, func, args, indexes, current_function = None):
+        self._indexes = indexes
+        super().__init__(func, args, current_function)
+
+    @property
+    def indexes(self):
+        """
+        Indexes of function call.
+
+        Represents the indexes of the function call
+        """
+        return self._indexes
+
 class ConstructorCall(FunctionCall):
 
     """
diff --git a/pyccel/ast/cuda.py b/pyccel/ast/cuda.py
new file mode 100644
index 0000000000..f1e50ef7f0
--- /dev/null
+++ b/pyccel/ast/cuda.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+CUDA Module
+This module provides a collection of classes and utilities for CUDA programming.
+"""
+from pyccel.ast.core import FunctionCall
+
+__all__ = (
+    'KernelCall',
+)
+
+class KernelCall(FunctionCall):
+    """
+    Represents a kernel function call in the code.
+
+    The class serves as a representation of a kernel
+    function call within the codebase.
+
+    Parameters
+    ----------
+    func : FunctionDef
+        The definition of the function being called.
+
+    args : iterable of FunctionCallArgument
+        The arguments passed to the function.
+
+    num_blocks : TypedAstNode
+        The number of blocks. These objects must have a primitive type of `PrimitiveIntegerType`.
+
+    tp_block : TypedAstNode
+        The number of threads per block. These objects must have a primitive type of `PrimitiveIntegerType`.
+
+    current_function : FunctionDef, optional
+        The function where the call takes place.
+    """
+    __slots__ = ('_num_blocks','_tp_block')
+    _attribute_nodes = (*FunctionCall._attribute_nodes, '_num_blocks', '_tp_block')
+
+    def __init__(self, func, args, num_blocks, tp_block, current_function = None):
+        self._num_blocks = num_blocks
+        self._tp_block = tp_block
+        super().__init__(func, args, current_function)
+
+    @property
+    def num_blocks(self):
+        """
+        The number of blocks in the kernel being called.
+
+        The number of blocks in the kernel being called.
+        """
+        return self._num_blocks
+
+    @property
+    def tp_block(self):
+        """
+        The number of threads per block.
+
+        The number of threads per block.
+        """
+        return self._tp_block
+
diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
new file mode 100644
index 0000000000..b540f20993
--- /dev/null
+++ b/pyccel/ast/cudaext.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+CUDA Extension Module
+Provides CUDA functionality for code generation.
+"""
+from .internals      import PyccelFunction
+
+from .datatypes      import VoidType
+from .core           import Module, PyccelFunctionDef
+
+__all__ = (
+    'CudaSynchronize',
+)
+
+class CudaSynchronize(PyccelFunction):
+    """
+    Represents a call to Cuda.synchronize for code generation.
+
+    This class serves as a representation of the Cuda.synchronize method.
+    """
+    __slots__ = ()
+    _attribute_nodes = ()
+    _shape     = None
+    _class_type = VoidType()
+    def __init__(self):
+        super().__init__()
+
+cuda_funcs = {
+    'synchronize'       : PyccelFunctionDef('synchronize' , CudaSynchronize),
+}
+
+cuda_mod = Module('cuda',
+    variables=[],
+    funcs=cuda_funcs.values(),
+    imports=[]
+)
+
diff --git a/pyccel/ast/utilities.py b/pyccel/ast/utilities.py
index 1e6c0422ab..e5cd77b168 100644
--- a/pyccel/ast/utilities.py
+++ b/pyccel/ast/utilities.py
@@ -25,6 +25,7 @@
 from .literals      import LiteralInteger, LiteralEllipsis, Nil
 from .mathext       import math_mod
 from .sysext        import sys_mod
+from .cudaext       import cuda_mod
 
 from .numpyext      import (NumpyEmpty, NumpyArray, numpy_mod,
                             NumpyTranspose, NumpyLinspace)
@@ -49,7 +50,8 @@
 decorators_mod = Module('decorators',(),
         funcs = [PyccelFunctionDef(d, PyccelFunction) for d in pyccel_decorators.__all__])
 pyccel_mod = Module('pyccel',(),(),
-        imports = [Import('decorators', decorators_mod)])
+        imports = [Import('decorators', decorators_mod),
+                    Import('cuda', cuda_mod)])
 
 # TODO add documentation
 builtin_import_registry = Module('__main__',
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 277d2a3a6a..cd26843017 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -9,11 +9,12 @@
 enabling the direct translation of high-level Pyccel expressions into CUDA code.
 """
 
-from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers
+from pyccel.codegen.printing.ccode  import CCodePrinter
 
-from pyccel.ast.core        import Import, Module
+from pyccel.ast.core                import Import, Module
+from pyccel.ast.literals            import Nil
 
-from pyccel.errors.errors   import Errors
+from pyccel.errors.errors           import Errors
 
 
 errors = Errors()
@@ -61,6 +62,44 @@ def _print_Module(self, expr):
         self.exit_scope()
         return code
 
+    def function_signature(self, expr, print_arg_names = True):
+        """
+        Get the Cuda representation of the function signature.
+
+        Extract from the function definition `expr` all the
+        information (name, input, output) needed to create the
+        function signature and return a string describing the
+        function.
+        This is not a declaration as the signature does not end
+        with a semi-colon.
+
+        Parameters
+        ----------
+        expr : FunctionDef
+            The function definition for which a signature is needed.
+
+        print_arg_names : bool, default : True
+            Indicates whether argument names should be printed.
+
+        Returns
+        -------
+        str
+            Signature of the function.
+        """
+        cuda_decorater = '__global__' if 'kernel' in expr.decorators else ''
+        c_function_signature = super().function_signature(expr, print_arg_names)
+        return f'{cuda_decorater} {c_function_signature}'
+
+    def _print_KernelCall(self, expr):
+        func = expr.funcdef
+        args = [a.value or Nil() for a in expr.args]
+
+        args = ', '.join(self._print(a) for a in args)
+        return f"{func.name}<<<{expr.num_blocks}, {expr.tp_block}>>>({args});\n"
+
+    def _print_CudaSynchronize(self, expr):
+        return 'cudaDeviceSynchronize();\n'
+
     def _print_ModuleHeader(self, expr):
         self.set_scope(expr.module.scope)
         self._in_header = True
@@ -87,6 +126,7 @@ def _print_ModuleHeader(self, expr):
                     }}\n'
         return '\n'.join((f"#ifndef {name.upper()}_H",
                           f"#define {name.upper()}_H",
+                          imports,
                           global_variables,
                           function_declaration,
                           "#endif // {name.upper()}_H\n"))
diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py
new file mode 100644
index 0000000000..e8542ad5d5
--- /dev/null
+++ b/pyccel/cuda/__init__.py
@@ -0,0 +1,10 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+    This module is for exposing the CudaSubmodule functions.
+"""
+from .cuda_sync_primitives    import synchronize
+
+__all__ = ['synchronize']
diff --git a/pyccel/cuda/cuda_sync_primitives.py b/pyccel/cuda/cuda_sync_primitives.py
new file mode 100644
index 0000000000..f3442fe9e2
--- /dev/null
+++ b/pyccel/cuda/cuda_sync_primitives.py
@@ -0,0 +1,16 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+This submodule contains CUDA methods for Pyccel.
+"""
+
+
+def synchronize():
+    """
+    Synchronize CUDA device execution.
+
+    Synchronize CUDA device execution.
+    """
+
diff --git a/pyccel/decorators.py b/pyccel/decorators.py
index 1f640043db..77717a991f 100644
--- a/pyccel/decorators.py
+++ b/pyccel/decorators.py
@@ -19,6 +19,7 @@
     'sympy',
     'template',
     'types',
+    'kernel'
 )
 
 
@@ -109,3 +110,34 @@ def allow_negative_index(f,*args):
     def identity(f):
         return f
     return identity
+
+def kernel(f):
+    """
+    Decorator for marking a Python function as a kernel.
+
+    This class serves as a decorator to mark a Python function
+    as a kernel function, typically used for GPU computations.
+    This allows the function to be indexed with the number of blocks and threads.
+
+    Parameters
+    ----------
+    f : function
+        The function to which the decorator is applied.
+
+    Returns
+    -------
+    KernelAccessor
+        A class representing the kernel function.
+    """
+    class KernelAccessor:
+        """
+        Class representing the kernel function.
+
+        Class representing the kernel function.
+        """
+        def __init__(self, f):
+            self._f = f
+        def __getitem__(self, args):
+            return self._f
+
+    return KernelAccessor(f)
diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py
index 79eccc1df2..09966d810c 100644
--- a/pyccel/errors/messages.py
+++ b/pyccel/errors/messages.py
@@ -162,3 +162,11 @@
 WRONG_LINSPACE_ENDPOINT = 'endpoint argument must be boolean'
 NON_LITERAL_KEEP_DIMS = 'keep_dims argument must be a literal, otherwise rank is unknown'
 NON_LITERAL_AXIS = 'axis argument must be a literal, otherwise pyccel cannot determine which dimension to operate on'
+MISSING_KERNEL_CONFIGURATION = 'Kernel launch configuration not specified'
+INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch'
+INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call'
+INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call'
+
+
+
+
diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py
index e94b9c8413..fde10d6317 100644
--- a/pyccel/parser/semantic.py
+++ b/pyccel/parser/semantic.py
@@ -116,6 +116,8 @@
 from pyccel.ast.variable import IndexedElement, AnnotatedPyccelSymbol
 from pyccel.ast.variable import DottedName, DottedVariable
 
+from pyccel.ast.cuda import     KernelCall
+
 from pyccel.errors.errors import Errors
 from pyccel.errors.errors import PyccelSemanticError
 
@@ -133,7 +135,9 @@
         PYCCEL_RESTRICTION_LIST_COMPREHENSION_LIMITS, PYCCEL_RESTRICTION_LIST_COMPREHENSION_SIZE,
         UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE,
         PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT,
-        FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC)
+        FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET,
+        MISSING_KERNEL_CONFIGURATION,
+        INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK)
 
 from pyccel.parser.base      import BasicParser
 from pyccel.parser.syntactic import SyntaxParser
@@ -1139,6 +1143,67 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun
 
             return new_expr
 
+    def _handle_kernel(self, expr, func, args):
+        """
+        Create the node representing the kernel function call.
+
+        Create a FunctionCall or an instance of a PyccelInternalFunction
+        from the function information and arguments.
+
+        Parameters
+        ----------
+        expr : IndexedFunctionCall
+               Node has all the information about the function call.
+
+        func : FunctionDef | Interface | PyccelInternalFunction type
+               The function being called.
+
+        args : iterable of FunctionCallArgument
+               The arguments passed to the function.
+
+        Returns
+        -------
+        Pyccel.ast.cuda.KernelCall
+            The semantic representation of the kernel call.
+        """
+        if len(expr.indexes) != 2:
+            errors.report(INVALID_KERNEL_LAUNCH_CONFIG,
+                    symbol=expr,
+                    severity='fatal')
+        if len(func.results):
+            errors.report(f"cuda kernel function '{func.name}' returned a value in violation of the laid-down specification",
+                         symbol=expr,
+                         severity='fatal')
+        if isinstance(func, FunctionDef) and len(args) != len(func.arguments):
+            errors.report(f"{len(args)} argument types given, but function takes {len(func.arguments)} arguments",
+                symbol=expr,
+                severity='fatal')
+        if not isinstance(expr.indexes[0], (LiteralInteger)):
+            if isinstance(expr.indexes[0], PyccelSymbol):
+                num_blocks = self.get_variable(expr.indexes[0])
+
+                if not isinstance(num_blocks.dtype, PythonNativeInt):
+                    errors.report(INVALID_KERNEL_CALL_BP_GRID,
+                    symbol = expr,
+                    severity='fatal')
+            else:
+                errors.report(INVALID_KERNEL_CALL_BP_GRID,
+                    symbol = expr,
+                    severity='fatal')
+        if not isinstance(expr.indexes[1], (LiteralInteger)):
+            if isinstance(expr.indexes[1], PyccelSymbol):
+                tp_block = self.get_variable(expr.indexes[1])
+                if not isinstance(tp_block.dtype, PythonNativeInt):
+                    errors.report(INVALID_KERNEL_CALL_TP_BLOCK,
+                    symbol = expr,
+                    severity='fatal')
+            else:
+                errors.report(INVALID_KERNEL_CALL_TP_BLOCK,
+                    symbol = expr,
+                    severity='fatal')
+        new_expr = KernelCall(func, args, expr.indexes[0], expr.indexes[1])
+        return new_expr
+
     def _sort_function_call_args(self, func_args, args):
         """
         Sort and add the missing call arguments to match the arguments in the function definition.
@@ -2815,6 +2880,23 @@ def _visit_Lambda(self, expr):
                 expr = Lambda(tuple(expr.variables), expr_new)
         return expr
 
+    def _visit_IndexedFunctionCall(self, expr):
+        name     = expr.funcdef
+        name = self.scope.get_expected_name(name)
+        func     = self.scope.find(name, 'functions')
+        args = self._handle_function_args(expr.args)
+
+        if func is None:
+            return errors.report(UNDEFINED_FUNCTION, symbol=expr.funcdef,
+                    bounding_box=(self.current_ast_node.lineno, self.current_ast_node.col_offset),
+                    severity='fatal')
+
+        func = self._annotate_the_called_function_def(func)
+        if 'kernel' in func.decorators :
+            return self._handle_kernel(expr, func, args)
+        else:
+            return errors.report("Unknown function type",
+                symbol=expr, severity='fatal')
     def _visit_FunctionCall(self, expr):
         name     = expr.funcdef
         try:
diff --git a/pyccel/parser/syntactic.py b/pyccel/parser/syntactic.py
index 2967f4999b..3af7f0728a 100644
--- a/pyccel/parser/syntactic.py
+++ b/pyccel/parser/syntactic.py
@@ -64,6 +64,8 @@
 
 from pyccel.ast.type_annotations import SyntacticTypeAnnotation, UnionTypeAnnotation
 
+from pyccel.ast.core import IndexedFunctionCall
+
 from pyccel.parser.base        import BasicParser
 from pyccel.parser.extend_tree import extend_tree
 from pyccel.parser.utilities   import get_default_path
@@ -1102,6 +1104,8 @@ def _visit_Call(self, stmt):
         elif isinstance(func, DottedName):
             func_attr = FunctionCall(func.name[-1], args)
             func = DottedName(*func.name[:-1], func_attr)
+        elif isinstance(func,IndexedElement):
+            func = IndexedFunctionCall(func.base, args, func.indices)
         else:
             raise NotImplementedError(f' Unknown function type {type(func)}')
 
diff --git a/tests/conftest.py b/tests/conftest.py
index a5082ef6e8..4e74d1ec7a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -59,6 +59,15 @@ def pytest_runtest_teardown(item, nextitem):
 
 def pytest_addoption(parser):
     parser.addoption("--developer-mode", action="store_true", default=github_debugging, help="Show tracebacks when pyccel errors are raised")
+    parser.addoption("--gpu_available", action="store_true",
+                default=False, help="enable GPU tests")
+
+def pytest_generate_tests(metafunc):
+    if "gpu_available" in metafunc.fixturenames:
+        if metafunc.config.getoption("gpu_available"):
+            metafunc.parametrize("gpu_available", [True])
+        else:
+            metafunc.parametrize("gpu_available", [False])
 
 def pytest_sessionstart(session):
     # setup_stuff
diff --git a/tests/cuda/test_kernel_semantic.py b/tests/cuda/test_kernel_semantic.py
new file mode 100644
index 0000000000..00b74c3bea
--- /dev/null
+++ b/tests/cuda/test_kernel_semantic.py
@@ -0,0 +1,176 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import pytest
+
+from pyccel import epyccel
+from pyccel.decorators import kernel
+from pyccel.errors.errors import Errors, PyccelSemanticError
+from pyccel.errors.messages import (INVALID_KERNEL_CALL_TP_BLOCK,
+                                    INVALID_KERNEL_CALL_BP_GRID,
+                                    INVALID_KERNEL_LAUNCH_CONFIG)
+
+
+@pytest.mark.cuda
+def test_invalid_block_number():
+    def invalid_block_number():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1.0
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_block_number, language="cuda")
+
+    assert errors.has_errors()
+
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_CALL_BP_GRID == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_thread_per_block():
+    def invalid_thread_per_block():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1.0
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_thread_per_block, language="cuda")
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_CALL_TP_BLOCK == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_launch_config_high():
+    def invalid_launch_config_high():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        third_param = 1
+        kernel_call[blocks_per_grid, threads_per_block, third_param]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_launch_config_high, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_launch_config_low():
+    def invalid_launch_config_low():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        kernel_call[blocks_per_grid]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_launch_config_low, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_arguments_for_kernel_call():
+    def invalid_arguments():
+        @kernel
+        def kernel_call(arg : int):
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_arguments, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "0 argument types given, but function takes 1 arguments" == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_arguments_for_kernel_call_2():
+    def invalid_arguments_():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block](1)
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_arguments_, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "1 argument types given, but function takes 0 arguments" == error_info.message
+
+
+@pytest.mark.cuda
+def test_kernel_return():
+    def kernel_return():
+        @kernel
+        def kernel_call():
+            return 7
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block](1)
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(kernel_return, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "cuda kernel function 'kernel_call' returned a value in violation of the laid-down specification" == error_info.message
diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py
new file mode 100644
index 0000000000..b6901b25a1
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/hello_kernel.py
@@ -0,0 +1,19 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+from pyccel import cuda
+
+@kernel
+def say_hello(its_morning : bool):
+    if(its_morning):
+        print("Hello and Good morning")
+    else:
+        print("Hello and Good afternoon")
+
+def f():
+    its_morning = True
+    say_hello[1,1](its_morning)
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
+
diff --git a/tests/pyccel/scripts/kernel/kernel_name_collision.py b/tests/pyccel/scripts/kernel/kernel_name_collision.py
new file mode 100644
index 0000000000..ac7abe25ae
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/kernel_name_collision.py
@@ -0,0 +1,8 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+
+@kernel
+def do():
+    pass
+
+do[1,1]()
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index ec1e846549..b4757a3c31 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -294,7 +294,7 @@ def compare_pyth_fort_output( p_output, f_output, dtype=float, language=None):
 #------------------------------------------------------------------------------
 def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True,
         cwd = None, pyccel_commands = "", output_dtype = float,
-        language = None, output_dir = None):
+        language = None, output_dir = None, execute_code = True):
     """
     Run pyccel and compare the output to ensure that the results
     are equivalent
@@ -394,13 +394,14 @@ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True,
             compile_fortran(cwd, output_test_file, dependencies)
         elif language == 'c':
             compile_c(cwd, output_test_file, dependencies)
-
-    lang_out = get_lang_output(output_test_file, language)
-    compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language)
+    if  execute_code:
+        lang_out = get_lang_output(output_test_file, language)
+        compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language)
 
 #==============================================================================
 # UNIT TESTS
 #==============================================================================
+
 def test_relative_imports_in_project(language):
 
     base_dir = os.path.dirname(os.path.realpath(__file__))
@@ -728,6 +729,19 @@ def test_multiple_results(language):
 def test_elemental(language):
     pyccel_test("scripts/decorators_elemental.py", language = language)
 
+#------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_hello_kernel(gpu_available):
+    types = str
+    pyccel_test("scripts/kernel/hello_kernel.py",
+            language="cuda", output_dtype=types , execute_code=gpu_available)
+
+#------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_kernel_collision(gpu_available):
+    pyccel_test("scripts/kernel/kernel_name_collision.py",
+            language="cuda", execute_code=gpu_available)
+
 #------------------------------------------------------------------------------
 def test_print_strings(language):
     types = str

From 91d610129ea8a4e3f695281cf6891c2691f3b79a Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 3 Jul 2024 17:37:02 +0100
Subject: [PATCH 040/130] Updated CUDA Name Clash Checker By Added
 CUDA-specific keywords (#60)

This pull request addresses issue #59 by adding more CUDA-specific
keywords to enhance the checking of variable/function names and prevent
name clashes

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
---
 CHANGELOG.md                              |  1 +
 pyccel/naming/cudanameclashchecker.py     | 36 ++++++++++++++++++++++-
 pyccel/naming/languagenameclashchecker.py |  5 ++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 717f638bf3..afdabc3ab7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file.
 
 -   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
 -   #48 : Fix incorrect handling of imports in `cuda`.
+-   #59 : Updated `cuda` clash checker.
 -   #42 : Add support for custom kernel in`cuda`.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
 
diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py
index 971204e912..c7aaa4952f 100644
--- a/pyccel/naming/cudanameclashchecker.py
+++ b/pyccel/naming/cudanameclashchecker.py
@@ -16,6 +16,7 @@ class CudaNameClashChecker(LanguageNameClashChecker):
     verify that they do not cause name clashes. Name clashes may be due to
     new variables, or due to the use of reserved keywords.
     """
+
     # Keywords as mentioned on https://en.cppreference.com/w/c/keyword
     keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const',
         'continue', 'default', 'do', 'double', 'else', 'enum',
@@ -37,7 +38,40 @@ class CudaNameClashChecker(LanguageNameClashChecker):
         'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX',
         'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer',
         'get_index', 'numpy_to_ndarray_strides',
-        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'])
+        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'
+        '__global__', '__device__', '__host__','__constant__', '__shared__',
+        '__managed__','threadIdx', 'blockIdx', 'blockDim', 'gridDim',
+        'warpSize', 'cudaMalloc', 'cudaFree', 'cudaMemcpy', 'cudaMemset',
+        'cudaMallocHost', 'cudaFreeHost', 'cudaMallocPitch',
+        'cudaMallocArray', 'cudaFreeArray', 'cudaHostAlloc',
+        'cudaHostRegister', 'cudaHostUnregister', 'cudaHostGetDevicePointer',
+        'cudaHostGetFlags', 'cudaDeviceSynchronize', 'cudaDeviceReset',
+        'cudaSetDevice', 'cudaGetDeviceCount', 'cudaGetDeviceProperties',
+        'cudaChooseDevice', 'cudaSetDeviceFlags', 'cudaGetDevice',
+        'cudaStreamCreate', 'cudaStreamDestroy', 'cudaStreamSynchronize',
+        'cudaStreamWaitEvent', 'cudaEventCreate', 'cudaEventDestroy', 'cudaEventRecord',
+        'cudaEventSynchronize', 'cudaEventElapsedTime', 'cuInit', 'cuDeviceGet',
+        'cuDeviceGetCount', 'cuDeviceGetName',
+        'cuDeviceComputeCapability', 'cuCtxCreate', 'cuCtxDestroy',
+        'cuCtxSynchronize', 'cuModuleLoad', 'cuModuleUnload',
+        'cuModuleGetFunction', 'cuModuleGetGlobal', 'cuModuleGetTexRef',
+        'cuMemAlloc', 'cuMemFree', 'cuMemcpyHtoD', 'cuMemcpyDtoH',
+        'cuMemcpyDtoD', 'cuMemcpyHtoDAsync', 'cuMemcpyDtoHAsync',
+        'cuMemcpyDtoDAsync', 'cuMemsetD8', 'cuMemsetD16', 'cuMemsetD32',
+        'cuMemsetD2D8', 'cuMemsetD2D16', 'cuMemsetD2D32', 'cuParamSetSize',
+        'cuParamSeti', 'cuParamSetf', 'cuParamSetv', 'cuLaunch', 'cuLaunchGrid',
+        'cuLaunchGridAsync', 'cuEventCreate', 'cuEventRecord', 'cuEventQuery',
+        'cuEventSynchronize', 'cuEventDestroy', 'cuEventElapsedTime',
+        'cuStreamCreate', 'cuStreamQuery', 'cuStreamSynchronize',
+        'cuStreamDestroy', 'cuFuncSetBlockShape', 'cuFuncSetSharedSize',
+        'cuFuncGetAttribute', 'cuTexRefCreate', 'cuTexRefDestroy',
+        'cuTexRefSetArray', 'cuTexRefSetAddress', 'cuTexRefSetAddress2D',
+        'cuTexRefSetFormat', 'cuTexRefSetAddressMode', 'cuTexRefSetFilterMode',
+        'cuTexRefSetFlags', 'cuTexRefGetAddress', 'cuTexRefGetArray',
+        'cuTexRefGetAddressMode', 'cuTexRefGetFilterMode', 'cuTexRefGetFormat',
+        'cuTexRefGetFlags', 'cuLaunchKernel', 'cuOccupancyMaxActiveBlocksPerMultiprocessor',
+        'cuOccupancyMaxPotentialBlockSize', 'cuOccupancyMaxPotentialBlockSizeWithFlags'
+    ])
 
     def has_clash(self, name, symbols):
         """
diff --git a/pyccel/naming/languagenameclashchecker.py b/pyccel/naming/languagenameclashchecker.py
index fa672a905b..d6415e6449 100644
--- a/pyccel/naming/languagenameclashchecker.py
+++ b/pyccel/naming/languagenameclashchecker.py
@@ -19,6 +19,11 @@ class LanguageNameClashChecker(metaclass = Singleton):
     """
     keywords = None
 
+    def __init__(self): #pylint: disable=useless-parent-delegation
+        # This __init__ function is required so the ArgumentSingleton can
+        # always detect a signature
+        super().__init__()
+
     def _get_collisionless_name(self, name, symbols):
         """
         Get a name which doesn't collision with keywords or symbols.

From 9234e99958bd2b53b74f26670907465aee521302 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 3 Jul 2024 18:04:22 +0100
Subject: [PATCH 041/130] add handle for custom device (#61)

This pull request addresses issue
https://github.com/pyccel/pyccel-cuda/issues/41 by implementing a new
feature in Pyccel that allows users to define a custom device

**Commit Summary**

- Adding handler for custom device and its code generation.
- Adding test

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
---
 CHANGELOG.md                               |  1 +
 docs/cuda.md                               | 25 ++++++++++++++++-
 pyccel/codegen/printing/cucode.py          |  7 ++---
 pyccel/decorators.py                       | 19 +++++++++++++
 pyccel/errors/messages.py                  |  2 +-
 pyccel/parser/semantic.py                  |  7 ++++-
 tests/cuda/test_device_semantic.py         | 31 ++++++++++++++++++++++
 tests/pyccel/scripts/kernel/device_test.py | 18 +++++++++++++
 tests/pyccel/test_pyccel.py                |  8 ++++++
 9 files changed, 112 insertions(+), 6 deletions(-)
 create mode 100644 tests/cuda/test_device_semantic.py
 create mode 100644 tests/pyccel/scripts/kernel/device_test.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index afdabc3ab7..d5523ac5d7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file.
 -   #59 : Updated `cuda` clash checker.
 -   #42 : Add support for custom kernel in`cuda`.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
+-   #41 : Add support for custom device in`cuda`.
 
 ## \[UNRELEASED\]
 
diff --git a/docs/cuda.md b/docs/cuda.md
index de30d52b80..7643a4ac02 100644
--- a/docs/cuda.md
+++ b/docs/cuda.md
@@ -20,4 +20,27 @@ threadsperblock = 1
 # Call your kernel function
 my_kernel[blockspergrid, threadsperblock]()
 
-```
\ No newline at end of file
+```
+
+### device
+
+Device functions are similar to kernels, but are executed within the context of a kernel. They can be called only from kernels or device functions, and are typically used for operations that are too small to justify launching a separate kernel, or for operations that need to be performed repeatedly within the context of a kernel.
+
+```python
+from pyccel.decorators import device, kernel
+
+@device
+def add(x, y):
+    return x + y
+
+@kernel
+def my_kernel():
+    x = 1
+    y = 2
+    z = add(x, y)
+    print(z)
+
+my_kernel[1, 1]()
+
+```
+
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index cd26843017..7c01d93c47 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -86,9 +86,10 @@ def function_signature(self, expr, print_arg_names = True):
         str
             Signature of the function.
         """
-        cuda_decorater = '__global__' if 'kernel' in expr.decorators else ''
+        cuda_decorator = '__global__' if 'kernel' in expr.decorators else \
+        '__device__' if 'device' in expr.decorators else ''
         c_function_signature = super().function_signature(expr, print_arg_names)
-        return f'{cuda_decorater} {c_function_signature}'
+        return f'{cuda_decorator} {c_function_signature}'
 
     def _print_KernelCall(self, expr):
         func = expr.funcdef
@@ -109,7 +110,7 @@ def _print_ModuleHeader(self, expr):
         cuda_headers = ""
         for f in expr.module.funcs:
             if not f.is_inline:
-                if 'kernel' in f.decorators:  # Checking for 'kernel' decorator
+                if 'kernel' in f.decorators or 'device' in f.decorators:
                     cuda_headers += self.function_signature(f) + ';\n'
                 else:
                     funcs += self.function_signature(f) + ';\n'
diff --git a/pyccel/decorators.py b/pyccel/decorators.py
index 77717a991f..ff413fe443 100644
--- a/pyccel/decorators.py
+++ b/pyccel/decorators.py
@@ -11,6 +11,7 @@
 __all__ = (
     'allow_negative_index',
     'bypass',
+    'device',
     'elemental',
     'inline',
     'private',
@@ -141,3 +142,21 @@ def __getitem__(self, args):
             return self._f
 
     return KernelAccessor(f)
+
+def device(f):
+    """
+    Decorator for marking a function as a GPU device function.
+
+    This decorator is used to mark a Python function as a GPU device function.
+
+    Parameters
+    ----------
+    f : Function
+        The function to be marked as a device.
+
+    Returns
+    -------
+    f
+        The function marked as a device.
+    """
+    return f
diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py
index 09966d810c..5fe622c29b 100644
--- a/pyccel/errors/messages.py
+++ b/pyccel/errors/messages.py
@@ -166,7 +166,7 @@
 INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch'
 INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call'
 INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call'
-
+INVAlID_DEVICE_CALL = 'A function decorated with "device" should be called only from a "kernel" or another "device" function.'
 
 
 
diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py
index fde10d6317..7e8dd11bb4 100644
--- a/pyccel/parser/semantic.py
+++ b/pyccel/parser/semantic.py
@@ -136,9 +136,10 @@
         UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE,
         PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT,
         FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET,
-        MISSING_KERNEL_CONFIGURATION,
+        MISSING_KERNEL_CONFIGURATION, INVAlID_DEVICE_CALL,
         INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK)
 
+
 from pyccel.parser.base      import BasicParser
 from pyccel.parser.syntactic import SyntaxParser
 
@@ -1061,6 +1062,10 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun
         FunctionCall/PyccelFunction
             The semantic representation of the call.
         """
+
+        if isinstance(func, FunctionDef) and 'device' in func.decorators:
+            if 'kernel' not in self.scope.decorators and 'device' not in self.scope.decorators:
+                errors.report(INVAlID_DEVICE_CALL,symbol=expr, severity='fatal')
         if isinstance(func, PyccelFunctionDef):
             if use_build_functions:
                 annotation_method = '_build_' + func.cls_name.__name__
diff --git a/tests/cuda/test_device_semantic.py b/tests/cuda/test_device_semantic.py
new file mode 100644
index 0000000000..5723991961
--- /dev/null
+++ b/tests/cuda/test_device_semantic.py
@@ -0,0 +1,31 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import pytest
+
+from pyccel import epyccel
+from pyccel.decorators import device
+from pyccel.errors.errors import Errors, PyccelSemanticError
+from pyccel.errors.messages import (INVAlID_DEVICE_CALL,)
+
+
+@pytest.mark.cuda
+def test_invalid_device_call():
+    def invalid_device_call():
+        @device
+        def device_call():
+            pass
+        def fake_kernel_call():
+            device_call()
+
+        fake_kernel_call()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_device_call, language="cuda")
+
+    assert errors.has_errors()
+
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert INVAlID_DEVICE_CALL == error_info.message
diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py
new file mode 100644
index 0000000000..a4762a6242
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/device_test.py
@@ -0,0 +1,18 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import device, kernel
+from pyccel import cuda
+
+@device
+def device_call():
+    print("Hello from device")
+
+@kernel
+def kernel_call():
+    device_call()
+
+def f():
+    kernel_call[1,1]()
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index b4757a3c31..2d55c6e1cb 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -742,6 +742,14 @@ def test_kernel_collision(gpu_available):
     pyccel_test("scripts/kernel/kernel_name_collision.py",
             language="cuda", execute_code=gpu_available)
 
+#------------------------------------------------------------------------------
+
+@pytest.mark.cuda
+def test_device_call(gpu_available):
+    types = str
+    pyccel_test("scripts/kernel/device_test.py",
+            language="cuda", output_dtype=types, execute_code=gpu_available)
+
 #------------------------------------------------------------------------------
 def test_print_strings(language):
     types = str

From c79b56d33e8f5a5239d79996fceb069cab71a163 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Mon, 15 Jul 2024 14:50:01 +0100
Subject: [PATCH 042/130] work in progress

---
 pyccel/ast/cudaext.py                        |  6 ++--
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu | 33 +++++++-------------
 2 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index e107b6fe6f..4d3be8cb3f 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -88,7 +88,7 @@ def __init__(self, shape, fill_value, dtype='float', order='C'):
         self._shape = shape
         rank = len(self._shape)
         order = CudaNewarray._process_order(rank, order)
-        class_type = CudaArrayType(dtype, rank, order, 'device')
+        class_type = CudaArrayType(dtype, rank, order, 'host')
         super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = 'device')
     @property
     def fill_value(self):
@@ -102,7 +102,7 @@ class CudaAutoFill(CudaFull):
     def __init__(self, shape, dtype='float', order='C'):
         super().__init__(shape, Nil(), dtype, order)
 
-class CudaEmpty(CudaAutoFill):
+class CudaHostEmpty(CudaAutoFill):
     """
     Represents a call to  Cuda.host_empty for code generation.
 
@@ -149,7 +149,7 @@ def __init__(self):
 cuda_funcs = {
     'synchronize'       : PyccelFunctionDef('synchronize' , CudaSynchronize),
     'full'              : PyccelFunctionDef('full' , CudaFull),
-    'empty'             : PyccelFunctionDef('empty' , CudaEmpty),
+    'host_empty'             : PyccelFunctionDef('host_empty' , CudaHostEmpty),
 }
 
 cuda_mod = Module('cuda',
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
index 34890002f3..47b0e5d420 100644
--- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
@@ -5,20 +5,15 @@ void    device_memory(void** devPtr, size_t size)
     cudaMalloc(devPtr, size);
 }
 
-void    managed_memory(void** devPtr, size_t size)
-{
-    cudaMallocManaged(devPtr, size);
-}
-
 void    host_memory(void** devPtr, size_t size)
 {
     cudaMallocHost(devPtr, size);
 }
-t_ndarray    cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape,
-        enum e_types type, bool is_view)
+t_ndarray   cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view ,
+enum e_memory_locations location)
 {
     t_ndarray  arr;
-    void (*fun_ptr_arr[])(void**, size_t) = {managed_memory, host_memory, device_memory};
+    void (*fun_ptr_arr[])(void**, size_t) = {host_memory, device_memory};
 
     arr.nd = nd;
     arr.type = type;
@@ -48,14 +43,20 @@ t_ndarray    cuda_array_create(enum e_memory_locations location, int32_t nd, int
     }
     arr.is_view = is_view;
     arr.length = 1;
-    arr.shape = (int64_t *)malloc(arr.nd * sizeof(int64_t));
+    cudaMallocManaged(&(arr.shape), arr.nd * sizeof(int64_t));
     for (int32_t i = 0; i < arr.nd; i++)
     {
         arr.length *= shape[i];
         arr.shape[i] = shape[i];
     }
     arr.buffer_size = arr.length * arr.type_size;
-
+    cudaMallocManaged(&(arr.strides), nd * sizeof(int64_t));
+    for (int32_t i = 0; i < arr.nd; i++)
+    {
+        arr.strides[i] = 1;
+        for (int32_t j = i + 1; j < arr.nd; j++)
+            arr.strides[i] *= arr.shape[j];
+    }
     if (!is_view)
         (*fun_ptr_arr[location])(&(arr.raw_data), arr.buffer_size);
     return (arr);
@@ -82,14 +83,4 @@ int32_t cuda_free(t_ndarray  arr)
     cudaFree(arr.shape);
     arr.shape = NULL;
     return (0);
-}
-
-__host__ __device__
-int32_t cuda_free_pointer(t_ndarray  arr)
-{
-    if (arr.is_view == false || arr.shape == NULL)
-        return (0);
-    cudaFree(arr.shape);
-    arr.shape = NULL;
-    return (0);
-}
+}
\ No newline at end of file

From 1c7ec43aa68e273e16572da7680bf9d6c6b909c3 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Mon, 15 Jul 2024 22:28:12 +0100
Subject: [PATCH 043/130] work in progress

---
 pyccel/codegen/pipeline.py             | 2 +-
 pyccel/codegen/printing/ccode.py       | 1 +
 pyccel/codegen/printing/codeprinter.py | 2 +-
 pyccel/codegen/printing/cucode.py      | 1 -
 4 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py
index eb357fab74..f3e3be1602 100644
--- a/pyccel/codegen/pipeline.py
+++ b/pyccel/codegen/pipeline.py
@@ -404,7 +404,7 @@ def get_module_dependencies(parser, deps):
                     verbose=verbose)
 
         timers["Compilation without wrapper"] = time.time() - start_compile_target_language
-
+        print(100*'-')
         # Create shared library
         generated_filepath, shared_lib_timers = create_shared_library(codegen,
                                                mod_obj,
diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index d81ed35293..dd16523284 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -1354,6 +1354,7 @@ def _print_FuncAddressDeclare(self, expr):
         return f'{ret_type} (*{name})({arg_code});\n'
 
     def _print_Declare(self, expr):
+        print("1")
         if isinstance(expr.variable, InhomogeneousTupleVariable):
             return ''.join(self._print_Declare(Declare(v,intent=expr.intent, static=expr.static)) for v in expr.variable)
 
diff --git a/pyccel/codegen/printing/codeprinter.py b/pyccel/codegen/printing/codeprinter.py
index 2dbcde1069..566cb4af7c 100644
--- a/pyccel/codegen/printing/codeprinter.py
+++ b/pyccel/codegen/printing/codeprinter.py
@@ -50,7 +50,6 @@ def doprint(self, expr):
 
         # Do the actual printing
         lines = self._print(expr).splitlines(True)
-
         # Format the output
         return ''.join(self._format_code(lines))
 
@@ -83,6 +82,7 @@ def _print(self, expr):
 
         classes = type(expr).__mro__
         for cls in classes:
+            print('_print_' + cls.__name__)
             print_method = '_print_' + cls.__name__
             if hasattr(self, print_method):
                 obj = getattr(self, print_method)(expr)
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 6d8a6bc305..613665d587 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -180,7 +180,6 @@ def get_declare_type(self, expr):
         if not isinstance(class_type, CudaArrayType ) or rank <= 0:
             return super().get_declare_type(expr)
         self.add_import(c_imports['ndarrays'])
-
         dtype = 't_ndarray '
         return dtype
 

From ba103eeb84982134a3cfc27b766cbd1b2ed8841e Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Tue, 16 Jul 2024 15:36:25 +0100
Subject: [PATCH 044/130] work in progress

---
 pyccel/codegen/printing/ccode.py       |  3 +--
 pyccel/codegen/printing/codeprinter.py |  1 -
 pyccel/stdlib/ndarrays/ndarrays.c      | 25 +++++++++++++++++++------
 pyccel/stdlib/ndarrays/ndarrays.h      |  7 +++++++
 4 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index dd16523284..85b5071b18 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -1317,7 +1317,7 @@ def get_declare_type(self, expr):
             if isinstance(expr.class_type, (HomogeneousSetType, HomogeneousListType)):
                 dtype = self.get_c_type(expr.class_type)
                 return dtype
-            if isinstance(expr.class_type,(HomogeneousTupleType, NumpyNDArrayType)):
+            if isinstance(expr.class_type,(HomogeneousTupleType, NumpyNDArrayType, CudaArrayType)):
                 if expr.rank > 15:
                     errors.report(UNSUPPORTED_ARRAY_RANK, symbol=expr, severity='fatal')
                 self.add_import(c_imports['ndarrays'])
@@ -1354,7 +1354,6 @@ def _print_FuncAddressDeclare(self, expr):
         return f'{ret_type} (*{name})({arg_code});\n'
 
     def _print_Declare(self, expr):
-        print("1")
         if isinstance(expr.variable, InhomogeneousTupleVariable):
             return ''.join(self._print_Declare(Declare(v,intent=expr.intent, static=expr.static)) for v in expr.variable)
 
diff --git a/pyccel/codegen/printing/codeprinter.py b/pyccel/codegen/printing/codeprinter.py
index 566cb4af7c..c9d2b1a9e8 100644
--- a/pyccel/codegen/printing/codeprinter.py
+++ b/pyccel/codegen/printing/codeprinter.py
@@ -82,7 +82,6 @@ def _print(self, expr):
 
         classes = type(expr).__mro__
         for cls in classes:
-            print('_print_' + cls.__name__)
             print_method = '_print_' + cls.__name__
             if hasattr(self, print_method):
                 obj = getattr(self, print_method)(expr)
diff --git a/pyccel/stdlib/ndarrays/ndarrays.c b/pyccel/stdlib/ndarrays/ndarrays.c
index bceaeea429..784b222cbb 100644
--- a/pyccel/stdlib/ndarrays/ndarrays.c
+++ b/pyccel/stdlib/ndarrays/ndarrays.c
@@ -46,6 +46,7 @@ void print_ndarray_memory(t_ndarray nd)
             case nd_bool:
                 printf("[%d]", nd.nd_bool[i]);
                 break;
+            #ifndef __NVCC__
             case nd_cfloat:
             {
                 double real = creal(nd.nd_cfloat[i]);
@@ -60,6 +61,8 @@ void print_ndarray_memory(t_ndarray nd)
                 printf("[%lf%+lfj]", real, imag);
                 break;
             }
+            #endif
+
         }
         ++i;
     }
@@ -248,7 +251,7 @@ void   _array_fill_double(double c, t_ndarray arr)
         for (int32_t i = 0; i < arr.length; i++)
             arr.nd_double[i] = c;
 }
-
+#ifndef __NVCC__
 void   _array_fill_cfloat(float complex c, t_ndarray arr)
 {
     if (c == 0)
@@ -267,6 +270,7 @@ void   _array_fill_cdouble(double complex c, t_ndarray arr)
         for (int32_t i = 0; i < arr.length; i++)
             arr.nd_cdouble[i] = c;
 }
+#endif
 
 /*
 ** deallocation
@@ -584,24 +588,24 @@ bool is_same_shape(t_ndarray a, t_ndarray b)
                 if(elem_wise_cp == false)\
                 { \
                     for(int64_t i = 0; i < src.length; i++) \
-                        dest->nd_cfloat[i + offset] = (float complex)src.nd_##SRC_TYPE[i]; \
+                        dest->nd_cfloat[i + offset] = src.nd_cfloat[i]; \
                 }\
                 else \
                 {\
                     for(int64_t i = 0; i < src.length; i++) \
-                        dest->nd_cfloat[element_index(*dest, i, dest->nd) + offset] = (float complex)src.nd_##SRC_TYPE[element_index(src, i, src.nd)]; \
+                        dest->nd_cfloat[element_index(*dest, i, dest->nd) + offset] = src.nd_cfloat[element_index(src, i, src.nd)]; \
                 }\
                 break; \
             case nd_cdouble: \
                 if(elem_wise_cp == false)\
                 { \
                     for(int64_t i = 0; i < src.length; i++) \
-                        dest->nd_cdouble[i + offset] = (double complex)src.nd_##SRC_TYPE[i]; \
+                        dest->nd_cdouble[i + offset] = src.nd_cdouble[i]; \
                 }\
                 else \
                 {\
                     for(int64_t i = 0; i < src.length; i++) \
-                        dest->nd_cdouble[element_index(*dest, i, dest->nd) + offset] = (double complex)src.nd_##SRC_TYPE[element_index(src, i, src.nd)]; \
+                        dest->nd_cdouble[element_index(*dest, i, dest->nd) + offset] = src.nd_cdouble[element_index(src, i, src.nd)]; \
                 }\
                 break; \
         } \
@@ -614,8 +618,10 @@ COPY_DATA_FROM_(int32)
 COPY_DATA_FROM_(int64)
 COPY_DATA_FROM_(float)
 COPY_DATA_FROM_(double)
+#ifndef __NVCC__
 COPY_DATA_FROM_(cfloat)
 COPY_DATA_FROM_(cdouble)
+#endif
 
 void copy_data(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp)
 {
@@ -648,7 +654,7 @@ void copy_data(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp
         case nd_double:
             copy_data_from_double(ds, src, offset, elem_wise_cp);
             break;
-
+        #ifndef __NVCC__
         case nd_cfloat:
             copy_data_from_cfloat(ds, src, offset, elem_wise_cp);
             break;
@@ -656,6 +662,7 @@ void copy_data(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp
         case nd_cdouble:
             copy_data_from_cdouble(ds, src, offset, elem_wise_cp);
             break;
+        #endif
     }
 }
 
@@ -747,8 +754,10 @@ NUMPY_SUM_(int32, int64_t, int32)
 NUMPY_SUM_(int64, int64_t, int64)
 NUMPY_SUM_(float32, float, float)
 NUMPY_SUM_(float64, double, double)
+#ifndef __NVCC__
 NUMPY_SUM_(complex64, float complex, cfloat)
 NUMPY_SUM_(complex128, double complex, cdouble)
+#endif
 
 #define NUMPY_AMAX_(NAME, TYPE, CTYPE) \
     TYPE numpy_amax_##NAME(t_ndarray arr) \
@@ -782,8 +791,10 @@ NUMPY_AMAX_(int32, int64_t, int32)
 NUMPY_AMAX_(int64, int64_t, int64)
 NUMPY_AMAX_(float32, float, float)
 NUMPY_AMAX_(float64, double, double)
+#ifndef __NVCC__
 NUMPY_AMAX_(complex64, float complex, cfloat)
 NUMPY_AMAX_(complex128, double complex, cdouble)
+#endif
 
 #define NUMPY_AMIN_(NAME, TYPE, CTYPE) \
     TYPE numpy_amin_##NAME(t_ndarray arr) \
@@ -817,6 +828,8 @@ NUMPY_AMIN_(int32, int64_t, int32)
 NUMPY_AMIN_(int64, int64_t, int64)
 NUMPY_AMIN_(float32, float, float)
 NUMPY_AMIN_(float64, double, double)
+#ifndef __NVCC__
 NUMPY_AMIN_(complex64, float complex, cfloat)
 NUMPY_AMIN_(complex128, double complex, cdouble)
+#endif
 
diff --git a/pyccel/stdlib/ndarrays/ndarrays.h b/pyccel/stdlib/ndarrays/ndarrays.h
index 9764113705..2e1b8e793d 100644
--- a/pyccel/stdlib/ndarrays/ndarrays.h
+++ b/pyccel/stdlib/ndarrays/ndarrays.h
@@ -10,6 +10,7 @@
 # include <stdbool.h>
 # include <stdint.h>
 # include <math.h>
+#include <cuComplex.h>
 
 /* mapping the function array_fill to the correct type */
 # define array_fill(c, arr) _Generic((c), int64_t : _array_fill_int64,\
@@ -93,8 +94,14 @@ typedef struct  s_ndarray
             float           *nd_float;
             double          *nd_double;
             bool            *nd_bool;
+        #ifndef __NVCC__
             double complex  *nd_cdouble;
             float  complex  *nd_cfloat;
+        #endif
+        #ifdef __NVCC__
+            cuDoubleComplex         *nd_cdouble;
+            cuFloatComplex          *nd_cfloat;
+        #endif
             };
     /* number of dimensions */
     int32_t                 nd;

From 947ce32bdff7eee993510a3f315e408d1dab9e48 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Tue, 16 Jul 2024 23:08:47 +0100
Subject: [PATCH 045/130] work in progress

---
 pyccel/ast/cudaext.py      | 13 +++++++------
 pyccel/codegen/pipeline.py |  1 -
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index 4d3be8cb3f..67648aebd6 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -77,7 +77,7 @@ class CudaFull(CudaNewarray):
     __slots__ = ('_fill_value','_shape')
     name = 'full'
 
-    def __init__(self, shape, fill_value, dtype='float', order='C'):
+    def __init__(self, shape, fill_value, dtype, order, memory_location):
         shape = process_shape(False, shape)
         init_dtype = dtype
         if(dtype is None):
@@ -88,8 +88,8 @@ def __init__(self, shape, fill_value, dtype='float', order='C'):
         self._shape = shape
         rank = len(self._shape)
         order = CudaNewarray._process_order(rank, order)
-        class_type = CudaArrayType(dtype, rank, order, 'host')
-        super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = 'device')
+        class_type = CudaArrayType(dtype, rank, order, memory_location)
+        super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = memory_location)
     @property
     def fill_value(self):
         return self._args[0]
@@ -99,8 +99,8 @@ class CudaAutoFill(CudaFull):
         the fill_value is implicitly specified
     """
     __slots__ = ()
-    def __init__(self, shape, dtype='float', order='C'):
-        super().__init__(shape, Nil(), dtype, order)
+    def __init__(self, shape, dtype, order, memory_location):
+        super().__init__(shape, Nil(), dtype, order, memory_location = memory_location)
 
 class CudaHostEmpty(CudaAutoFill):
     """
@@ -122,7 +122,8 @@ class CudaHostEmpty(CudaAutoFill):
     __slots__ = ()
     name = 'empty'
     def __init__(self, shape, dtype='float', order='C'):
-        super().__init__(shape, dtype, order)
+        memory_location = 'host'
+        super().__init__(shape, dtype, order , memory_location)
     
     @property
     def fill_value(self):
diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py
index f3e3be1602..ff8d657704 100644
--- a/pyccel/codegen/pipeline.py
+++ b/pyccel/codegen/pipeline.py
@@ -404,7 +404,6 @@ def get_module_dependencies(parser, deps):
                     verbose=verbose)
 
         timers["Compilation without wrapper"] = time.time() - start_compile_target_language
-        print(100*'-')
         # Create shared library
         generated_filepath, shared_lib_timers = create_shared_library(codegen,
                                                mod_obj,

From 1eeed75b63ba91fbfa04e65d819f3009de0a2daa Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 17 Jul 2024 10:00:53 +0100
Subject: [PATCH 046/130] adding test for host array

---
 tests/pyccel/scripts/kernel/host_array.py | 10 ++++++++++
 tests/pyccel/test_pyccel.py               |  5 +++++
 2 files changed, 15 insertions(+)
 create mode 100644 tests/pyccel/scripts/kernel/host_array.py

diff --git a/tests/pyccel/scripts/kernel/host_array.py b/tests/pyccel/scripts/kernel/host_array.py
new file mode 100644
index 0000000000..b682e0cbbf
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/host_array.py
@@ -0,0 +1,10 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from  pyccel import cuda
+
+a = cuda.host_empty(10, 'int')
+
+for i in range(10):
+    a[i] = 1
+
+if __name__ == '__main__':
+    print(a)
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index 2d55c6e1cb..e3d98ef3ef 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -743,6 +743,11 @@ def test_kernel_collision(gpu_available):
             language="cuda", execute_code=gpu_available)
 
 #------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_device_array(gpu_available):
+    types = str
+    pyccel_test("scripts/kernel/host_array.py",
+            language="cuda", output_dtype=types, execute_code=gpu_available)
 
 @pytest.mark.cuda
 def test_device_call(gpu_available):

From a8dbc18f5d1698675ea01079bc30c2c0ed9d7545 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 17 Jul 2024 10:32:27 +0100
Subject: [PATCH 047/130] add documentation for CUDA arrays

---
 docs/cuda.md | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/docs/cuda.md b/docs/cuda.md
index 7643a4ac02..4c7a1e8370 100644
--- a/docs/cuda.md
+++ b/docs/cuda.md
@@ -44,3 +44,28 @@ my_kernel[1, 1]()
 
 ```
 
+## Cuda Arrays
+
+Pyccel provides support for CUDA arrays, enabling efficient data transfer between the host and the GPU device. Here are some of the key functions you can use:
+
+### cuda+host_empty
+
+The cuda+host_empty function allocates an empty array on the host.
+
+```python
+from  pyccel import cuda
+
+a = cuda.host_empty(10, 'int')
+
+for i in range(10):
+    a[i] = 1
+
+if __name__ == '__main__':
+    print(a)
+```
+
+
+
+
+
+

From 5d91031b05e8ccb350b08e3bf42d46857df0e7c1 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 17 Jul 2024 10:33:42 +0100
Subject: [PATCH 048/130] fix: remove unnecessary spaces

---
 docs/cuda.md | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/docs/cuda.md b/docs/cuda.md
index 4c7a1e8370..ff68b5c69a 100644
--- a/docs/cuda.md
+++ b/docs/cuda.md
@@ -63,9 +63,3 @@ for i in range(10):
 if __name__ == '__main__':
     print(a)
 ```
-
-
-
-
-
-

From 7c74bbd9beb9277b3035801116bdea1d9c21600e Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 17 Jul 2024 10:56:55 +0100
Subject: [PATCH 049/130] refactoring the code

---
 pyccel/ast/cudaext.py             | 41 ++++++++++++++++++++++---------
 pyccel/ast/cudatypes.py           |  2 +-
 pyccel/codegen/printing/cucode.py |  4 +--
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index 67648aebd6..c557e27f00 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -14,8 +14,7 @@
 from .datatypes      import VoidType
 from .core           import Module, PyccelFunctionDef
 from .internals      import PyccelFunction
-from .internals      import LiteralInteger
-from .numpyext       import process_dtype, process_shape , DtypePrecisionToCastFunction
+from .numpyext       import process_dtype, process_shape
 from .cudatypes      import CudaArrayType
 
 
@@ -46,6 +45,7 @@ class CudaNewarray(PyccelFunction):
         The memory location of the new array ('host' or 'device').
     """
     __slots__ = ('_class_type', '_init_dtype', '_memory_location')
+    name = 'newarray'
 
     property
     def init_dtype(self):
@@ -63,17 +63,34 @@ def __init__(self, *arg,class_type, init_dtype, memory_location):
         self._memory_location = memory_location
 
         super().__init__(*arg)
-    @staticmethod
-    def _process_order(rank, order):
-
-        if rank < 2:
-            return None
-        order = str(order).strip('\'"')
-        assert order in ('C', 'F')
-        return order
 
 class CudaFull(CudaNewarray):
-  
+    """
+    Represents a call to `cuda.full` for code generation.
+    
+    Represents a call to the Cuda function `full` which creates an array
+    of a specified size and shape filled with a specified value.
+    Parameters
+    ----------
+    shape : TypedAstNode
+        Shape of the new array, e.g., ``(2, 3)`` or ``2``.
+        For a 1D array this is either a `LiteralInteger` or an expression.
+        For a ND array this is a `TypedAstNode` with the class type HomogeneousTupleType.
+
+    fill_value : TypedAstNode
+        Fill value.
+
+    dtype : PythonType, PyccelFunctionDef, LiteralString, str, optional
+        Datatype for the constructed array.
+        If `None` the dtype of the fill value is used.
+
+    order : {'C', 'F'}, optional
+        Whether to store multidimensional data in C- or Fortran-contiguous
+        (row- or column-wise) order in memory.
+
+    memory_location : str
+        The memory location of the new array ('host' or 'device').
+    """
     __slots__ = ('_fill_value','_shape')
     name = 'full'
 
@@ -87,7 +104,6 @@ def __init__(self, shape, fill_value, dtype, order, memory_location):
 
         self._shape = shape
         rank = len(self._shape)
-        order = CudaNewarray._process_order(rank, order)
         class_type = CudaArrayType(dtype, rank, order, memory_location)
         super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = memory_location)
     @property
@@ -99,6 +115,7 @@ class CudaAutoFill(CudaFull):
         the fill_value is implicitly specified
     """
     __slots__ = ()
+    name = 'auto_fill'
     def __init__(self, shape, dtype, order, memory_location):
         super().__init__(shape, Nil(), dtype, order, memory_location = memory_location)
 
diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index 29952c3efc..5f7ca5554f 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -9,7 +9,7 @@
 from functools import lru_cache
 import numpy as np
 
-from .datatypes import FixedSizeNumericType, HomogeneousContainerType, PythonNativeBool
+from .datatypes import FixedSizeNumericType, HomogeneousContainerType
 from pyccel.utilities.metaclasses import ArgumentSingleton
 from .datatypes import pyccel_type_to_original_type, original_type_to_pyccel_type
 from .numpytypes import NumpyNDArrayType
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 613665d587..5646c15dde 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -16,11 +16,9 @@
 
 from pyccel.errors.errors           import Errors
 from pyccel.ast.cudatypes           import CudaArrayType
-from pyccel.ast.datatypes           import HomogeneousContainerType, PythonNativeBool
+from pyccel.ast.datatypes           import HomogeneousContainerType
 from pyccel.ast.numpytypes          import numpy_precision_map
 from pyccel.ast.cudaext             import CudaFull
-from pyccel.ast.numpytypes          import NumpyFloat32Type, NumpyFloat64Type, NumpyComplex64Type, NumpyComplex128Type
-from pyccel.ast.numpytypes          import NumpyInt8Type, NumpyInt16Type, NumpyInt32Type, NumpyInt64Type
 
 errors = Errors()
 

From 95cf8214e828891815b5f6729f0e62fc7a22f24f Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 17 Jul 2024 11:21:00 +0100
Subject: [PATCH 050/130] refactoring the code

---
 pyccel/ast/cudaext.py             | 8 +++-----
 pyccel/ast/cudatypes.py           | 7 ++++++-
 pyccel/codegen/printing/cucode.py | 2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index c557e27f00..5687e4f179 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -13,7 +13,6 @@
 
 from .datatypes      import VoidType
 from .core           import Module, PyccelFunctionDef
-from .internals      import PyccelFunction
 from .numpyext       import process_dtype, process_shape
 from .cudatypes      import CudaArrayType
 
@@ -21,9 +20,9 @@
 
 __all__ = (
     'CudaSynchronize',
-    'CudaNewarray'
-    'CudaFull'
-    'CudaEmpty'
+    'CudaNewarray',
+    'CudaFull',
+    'CudaEmpty',
 )
 
 class CudaNewarray(PyccelFunction):
@@ -141,7 +140,6 @@ class CudaHostEmpty(CudaAutoFill):
     def __init__(self, shape, dtype='float', order='C'):
         memory_location = 'host'
         super().__init__(shape, dtype, order , memory_location)
-    
     @property
     def fill_value(self):
         """
diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index 5f7ca5554f..872a87f284 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -29,7 +29,7 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton):
     order : str
         The order of the memory layout for the new NumPy array.
     memory_location : str
-        The memory location of the new cuda array.
+        The memory location of the new cuda array ('host' or 'device').
     """
     __slots__ = ('_element_type', '_container_rank', '_order', '_memory_location')
 
@@ -45,6 +45,11 @@ def __init__(self, dtype, rank, order, memory_location):
 
     @property
     def memory_location(self):
+        """
+        The memory location of the new array ('host' or 'device').
+
+        The memory location of the new array ('host' or 'device').
+        """
         return self._memory_location
 
     @lru_cache
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 5646c15dde..13e0eec0f7 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -46,7 +46,7 @@ class CudaCodePrinter(CCodePrinter):
     """
     language = "cuda"
 
-    
+
     def __init__(self, filename, prefix_module = None):
 
         errors.set_target(filename)

From 0fc4a1bca6d881a6afc1c2247f1d15de3ad6dfb8 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 17 Jul 2024 11:46:47 +0100
Subject: [PATCH 051/130] refactoring the code

---
 pyccel/ast/cudaext.py             |  3 ---
 pyccel/codegen/printing/cucode.py | 17 +++++++++++++++++
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index 5687e4f179..96b205ceee 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -105,9 +105,6 @@ def __init__(self, shape, fill_value, dtype, order, memory_location):
         rank = len(self._shape)
         class_type = CudaArrayType(dtype, rank, order, memory_location)
         super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = memory_location)
-    @property
-    def fill_value(self):
-        return self._args[0]
 
 class CudaAutoFill(CudaFull):
     """ Abstract class for all classes which inherit from NumpyFull but
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 13e0eec0f7..005b2200d5 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -173,6 +173,23 @@ def _print_Deallocate(self, expr):
         else:
             return f"cuda_free({var_code});\n"
     def get_declare_type(self, expr):
+        """
+        Get the string which describes the type in a declaration.
+
+        This function returns the code which describes the type
+        of the `expr` object such that the declaration can be written as:
+        `f"{self.get_declare_type(expr)} {expr.name}"`
+
+        Parameters
+        ----------
+        expr : Variable
+            The variable whose type should be described.
+
+        Returns
+        -------
+        str
+            The code describing the type.
+        """
         class_type = expr.class_type
         rank  = expr.rank
         if not isinstance(class_type, CudaArrayType ) or rank <= 0:

From f43fba86148360e482b1d01139cf2702fe59d1fc Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 17 Jul 2024 11:50:17 +0100
Subject: [PATCH 052/130] refactoring the code

---
 pyccel/ast/cudaext.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index 96b205ceee..49e3670cf7 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -22,7 +22,7 @@
     'CudaSynchronize',
     'CudaNewarray',
     'CudaFull',
-    'CudaEmpty',
+    'CudaHostEmpty'
 )
 
 class CudaNewarray(PyccelFunction):
@@ -46,7 +46,7 @@ class CudaNewarray(PyccelFunction):
     __slots__ = ('_class_type', '_init_dtype', '_memory_location')
     name = 'newarray'
 
-    property
+    @property
     def init_dtype(self):
         """
         The dtype provided to the function when it was initialised in Python.

From f4546e14aceec9f36062bbba78ee4c851f837fb3 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 17 Jul 2024 12:13:47 +0100
Subject: [PATCH 053/130] fix a doc problem

---
 pyccel/ast/cudaext.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index 49e3670cf7..f9fbb8d42a 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -33,7 +33,7 @@ class CudaNewarray(PyccelFunction):
     to `Allocate` should inherit.
 
     Parameters
-
+    ----------
     class_type : NumpyNDArrayType
         The type of the new array.
 
@@ -46,7 +46,7 @@ class CudaNewarray(PyccelFunction):
     __slots__ = ('_class_type', '_init_dtype', '_memory_location')
     name = 'newarray'
 
-    @property
+    property
     def init_dtype(self):
         """
         The dtype provided to the function when it was initialised in Python.
@@ -69,6 +69,7 @@ class CudaFull(CudaNewarray):
     
     Represents a call to the Cuda function `full` which creates an array
     of a specified size and shape filled with a specified value.
+
     Parameters
     ----------
     shape : TypedAstNode
@@ -107,8 +108,10 @@ def __init__(self, shape, fill_value, dtype, order, memory_location):
         super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = memory_location)
 
 class CudaAutoFill(CudaFull):
-    """ Abstract class for all classes which inherit from NumpyFull but
-        the fill_value is implicitly specified
+    """
+    Abstract class for all classes which inherit from CudaFull.
+
+    Abstract class for all classes which inherit from CudaFull.
     """
     __slots__ = ()
     name = 'auto_fill'

From ec0132b322624525ddd27fcd29e53522a28d7e99 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 17 Jul 2024 14:20:53 +0100
Subject: [PATCH 054/130] fix a doc problem

---
 pyccel/ast/cudaext.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index f9fbb8d42a..fa63357876 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -46,7 +46,7 @@ class CudaNewarray(PyccelFunction):
     __slots__ = ('_class_type', '_init_dtype', '_memory_location')
     name = 'newarray'
 
-    property
+    @property
     def init_dtype(self):
         """
         The dtype provided to the function when it was initialised in Python.

From 9030dfe2af87f8a42f5355ca0a331696045f34a7 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 17 Jul 2024 14:34:20 +0100
Subject: [PATCH 055/130] fix a doc problem

---
 pyccel/ast/cudaext.py | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index fa63357876..90ffc5341e 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -34,6 +34,9 @@ class CudaNewarray(PyccelFunction):
 
     Parameters
     ----------
+    *args : tuple of TypedAstNode
+        The arguments of the superclass PyccelFunction.
+
     class_type : NumpyNDArrayType
         The type of the new array.
 
@@ -56,7 +59,7 @@ def init_dtype(self):
         """
         return self._init_dtype
 
-    def __init__(self, *arg,class_type, init_dtype, memory_location):
+    def __init__(self, *arg ,class_type, init_dtype, memory_location):
         self._class_type = class_type
         self._init_dtype = init_dtype
         self._memory_location = memory_location
@@ -107,18 +110,8 @@ def __init__(self, shape, fill_value, dtype, order, memory_location):
         class_type = CudaArrayType(dtype, rank, order, memory_location)
         super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = memory_location)
 
-class CudaAutoFill(CudaFull):
-    """
-    Abstract class for all classes which inherit from CudaFull.
-
-    Abstract class for all classes which inherit from CudaFull.
-    """
-    __slots__ = ()
-    name = 'auto_fill'
-    def __init__(self, shape, dtype, order, memory_location):
-        super().__init__(shape, Nil(), dtype, order, memory_location = memory_location)
 
-class CudaHostEmpty(CudaAutoFill):
+class CudaHostEmpty(CudaFull):
     """
     Represents a call to  Cuda.host_empty for code generation.
 
@@ -139,7 +132,7 @@ class CudaHostEmpty(CudaAutoFill):
     name = 'empty'
     def __init__(self, shape, dtype='float', order='C'):
         memory_location = 'host'
-        super().__init__(shape, dtype, order , memory_location)
+        super().__init__(shape, Nil(), dtype, order , memory_location)
     @property
     def fill_value(self):
         """

From e47373cfa021b24cbf9f98457d004b10c707539c Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 17 Jul 2024 14:50:22 +0100
Subject: [PATCH 056/130] fix a doc problem

---
 pyccel/ast/cudaext.py   | 8 ++++----
 pyccel/ast/cudatypes.py | 2 ++
 pyccel/ast/variable.py  | 1 -
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index 90ffc5341e..8bf790b968 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -54,17 +54,17 @@ def init_dtype(self):
         """
         The dtype provided to the function when it was initialised in Python.
 
-        The dtype provided to the function when it was initialised in Python.
-        If no dtype was provided then this should equal `None`.
+        The dtype provided to the function when it was initialised in Python
+        if no dtype was provided then this should equal `None`.
         """
         return self._init_dtype
 
-    def __init__(self, *arg ,class_type, init_dtype, memory_location):
+    def __init__(self, *args ,class_type, init_dtype, memory_location):
         self._class_type = class_type
         self._init_dtype = init_dtype
         self._memory_location = memory_location
 
-        super().__init__(*arg)
+        super().__init__(*args)
 
 class CudaFull(CudaNewarray):
     """
diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index 872a87f284..b8b063a116 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -21,6 +21,8 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton):
 
     Class representing the Cuda array type
 
+    Parameters
+    ----------
     dtype : NumpyNumericType | PythonNativeBool | GenericType
         The internal datatype of the object (GenericType is allowed for external
         libraries, e.g. MPI).
diff --git a/pyccel/ast/variable.py b/pyccel/ast/variable.py
index c8b9fd95ef..051cf631b7 100644
--- a/pyccel/ast/variable.py
+++ b/pyccel/ast/variable.py
@@ -109,7 +109,6 @@ def __init__(
         name,
         *,
         memory_handling='stack',
-        memory_location='host',
         is_const=False,
         is_target=False,
         is_optional=False,

From eee620207740cbc87561ba37baff42d4105b0ba2 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 17 Jul 2024 15:03:40 +0100
Subject: [PATCH 057/130] fix a doc problem

---
 pyccel/ast/cudaext.py   | 4 ++--
 pyccel/ast/cudatypes.py | 8 +++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index 8bf790b968..99efd2c4ed 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -54,8 +54,8 @@ def init_dtype(self):
         """
         The dtype provided to the function when it was initialised in Python.
 
-        The dtype provided to the function when it was initialised in Python
-        if no dtype was provided then this should equal `None`.
+        The dtype provided to the function when it was initialised in Python.
+        If no dtype was provided then this should equal `None`.
         """
         return self._init_dtype
 
diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index b8b063a116..70c1fc06c8 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -9,10 +9,12 @@
 from functools import lru_cache
 import numpy as np
 
-from .datatypes import FixedSizeNumericType, HomogeneousContainerType
 from pyccel.utilities.metaclasses import ArgumentSingleton
-from .datatypes import pyccel_type_to_original_type, original_type_to_pyccel_type
-from .numpytypes import NumpyNDArrayType
+
+from .datatypes     import FixedSizeNumericType, HomogeneousContainerType
+from .datatypes     import pyccel_type_to_original_type, original_type_to_pyccel_type
+
+from .numpytypes    import NumpyNDArrayType
 
 
 class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton):

From afd05c1b329687d7e8e454256417baae2c98d8c3 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 17 Jul 2024 16:00:08 +0100
Subject: [PATCH 058/130] make sure tests are running successfully

---
 pyccel/cuda/__init__.py                   |  3 ++-
 pyccel/cuda/cuda_arrays.py                | 29 +++++++++++++++++++++++
 tests/pyccel/scripts/kernel/host_array.py |  2 +-
 tests/pyccel/test_pyccel.py               |  6 +++--
 4 files changed, 36 insertions(+), 4 deletions(-)
 create mode 100644 pyccel/cuda/cuda_arrays.py

diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py
index e8542ad5d5..ae4be32387 100644
--- a/pyccel/cuda/__init__.py
+++ b/pyccel/cuda/__init__.py
@@ -6,5 +6,6 @@
     This module is for exposing the CudaSubmodule functions.
 """
 from .cuda_sync_primitives    import synchronize
+from .cuda_arrays             import host_empty
 
-__all__ = ['synchronize']
+__all__ = ['synchronize', 'host_empty']
diff --git a/pyccel/cuda/cuda_arrays.py b/pyccel/cuda/cuda_arrays.py
new file mode 100644
index 0000000000..19be596a54
--- /dev/null
+++ b/pyccel/cuda/cuda_arrays.py
@@ -0,0 +1,29 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+This submodule contains cuda_arrays methods for Pyccel.
+"""
+
+def host_empty(shape):
+    """
+    Create an empty array on the host.
+
+    Create an empty array on the host.
+
+    Parameters
+    ----------
+    shape : tuple of int or int
+        The shape of the array.
+
+    Returns
+    -------
+    a : array
+        The empty array on the host.
+    """
+    import numpy as np
+    a = np.empty(shape)
+    return a
+
+
diff --git a/tests/pyccel/scripts/kernel/host_array.py b/tests/pyccel/scripts/kernel/host_array.py
index b682e0cbbf..cacbcc1da5 100644
--- a/tests/pyccel/scripts/kernel/host_array.py
+++ b/tests/pyccel/scripts/kernel/host_array.py
@@ -1,7 +1,7 @@
 # pylint: disable=missing-function-docstring, missing-module-docstring
 from  pyccel import cuda
 
-a = cuda.host_empty(10, 'int')
+a = cuda.host_empty(10)
 
 for i in range(10):
     a[i] = 1
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index e3d98ef3ef..9b0f0d443e 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -744,11 +744,13 @@ def test_kernel_collision(gpu_available):
 
 #------------------------------------------------------------------------------
 @pytest.mark.cuda
-def test_device_array(gpu_available):
-    types = str
+def test_host_array(gpu_available):
+    types = float
     pyccel_test("scripts/kernel/host_array.py",
             language="cuda", output_dtype=types, execute_code=gpu_available)
 
+#------------------------------------------------------------------------------
+
 @pytest.mark.cuda
 def test_device_call(gpu_available):
     types = str

From ff950290b25ab60f2ba36e378bfe4d583e0d9a54 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 17 Jul 2024 16:20:48 +0100
Subject: [PATCH 059/130] fix a doc problem

---
 .dict_custom.txt           | 1 +
 pyccel/cuda/cuda_arrays.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.dict_custom.txt b/.dict_custom.txt
index 5d99e21194..8981c8bbef 100644
--- a/.dict_custom.txt
+++ b/.dict_custom.txt
@@ -111,6 +111,7 @@ variadic
 subclasses
 oneAPI
 Cuda
+cuda
 getter
 setter
 bitwise
diff --git a/pyccel/cuda/cuda_arrays.py b/pyccel/cuda/cuda_arrays.py
index 19be596a54..cbdf938c0c 100644
--- a/pyccel/cuda/cuda_arrays.py
+++ b/pyccel/cuda/cuda_arrays.py
@@ -19,7 +19,7 @@ def host_empty(shape):
 
     Returns
     -------
-    a : array
+    array
         The empty array on the host.
     """
     import numpy as np

From 21f70c042aed9f0dde6643d889784f711c6aa0de Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:41:27 +0100
Subject: [PATCH 060/130] Trigger tests on push to devel or main branch

---
 .github/workflows/anaconda_linux.yml   | 2 +-
 .github/workflows/anaconda_windows.yml | 2 +-
 .github/workflows/intel.yml            | 2 +-
 .github/workflows/linux.yml            | 2 +-
 .github/workflows/macosx.yml           | 2 +-
 .github/workflows/pickle.yml           | 2 +-
 .github/workflows/pickle_wheel.yml     | 2 +-
 .github/workflows/windows.yml          | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/anaconda_linux.yml b/.github/workflows/anaconda_linux.yml
index 5a5384e5ce..525903a54f 100644
--- a/.github/workflows/anaconda_linux.yml
+++ b/.github/workflows/anaconda_linux.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/anaconda_windows.yml b/.github/workflows/anaconda_windows.yml
index 154a4d01e8..0f3f8a04ed 100644
--- a/.github/workflows/anaconda_windows.yml
+++ b/.github/workflows/anaconda_windows.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: windows-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
index 977d5f9afd..5f340e1088 100644
--- a/.github/workflows/intel.yml
+++ b/.github/workflows/intel.yml
@@ -29,7 +29,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index ad39cee725..664ae3aa60 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   matrix_prep:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
     steps:
diff --git a/.github/workflows/macosx.yml b/.github/workflows/macosx.yml
index 4768a64efa..f51041c0b8 100644
--- a/.github/workflows/macosx.yml
+++ b/.github/workflows/macosx.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: macos-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/pickle.yml b/.github/workflows/pickle.yml
index 052028a5cb..cc3864afd2 100644
--- a/.github/workflows/pickle.yml
+++ b/.github/workflows/pickle.yml
@@ -31,7 +31,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-matrix.outputs.python_version }}
       matrix: ${{ steps.set-matrix.outputs.matrix }}
diff --git a/.github/workflows/pickle_wheel.yml b/.github/workflows/pickle_wheel.yml
index 1dc82af503..718dc13dcc 100644
--- a/.github/workflows/pickle_wheel.yml
+++ b/.github/workflows/pickle_wheel.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 60c560ffee..827038a279 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: windows-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:

From 13efc4e19c17cb0a3ee213ffdedd7290110aca65 Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:46:33 +0100
Subject: [PATCH 061/130] Add cuda workflow to test cuda developments on CI

---
 .github/actions/coverage_install/action.yml |  2 +-
 .github/actions/linux_install/action.yml    | 10 +--
 .github/actions/pytest_run/action.yml       |  4 +-
 .github/actions/pytest_run_cuda/action.yml  | 17 +++++
 .github/actions/python_install/action.yml   | 17 +++++
 .github/workflows/cuda.yml                  | 83 +++++++++++++++++++++
 ci_tools/bot_messages/show_tests.txt        |  1 +
 ci_tools/bot_tools/bot_funcs.py             | 12 +--
 ci_tools/devel_branch_tests.py              |  1 +
 ci_tools/json_pytest_output.py              |  2 +-
 10 files changed, 135 insertions(+), 14 deletions(-)
 create mode 100644 .github/actions/pytest_run_cuda/action.yml
 create mode 100644 .github/actions/python_install/action.yml
 create mode 100644 .github/workflows/cuda.yml

diff --git a/.github/actions/coverage_install/action.yml b/.github/actions/coverage_install/action.yml
index ac5294e542..5732baee34 100644
--- a/.github/actions/coverage_install/action.yml
+++ b/.github/actions/coverage_install/action.yml
@@ -15,7 +15,7 @@ runs:
     - name: Directory Creation
       run: |
         INSTALL_DIR=$(cd tests; python -c "import pyccel; print(pyccel.__path__[0])")
-        SITE_DIR=$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
+        SITE_DIR=$(dirname ${INSTALL_DIR})
         echo -e "import coverage; coverage.process_startup()" > ${SITE_DIR}/pyccel_cov.pth
         echo -e "[run]\nparallel = True\nsource = ${INSTALL_DIR}\ndata_file = $(pwd)/.coverage\n[report]\ninclude = ${INSTALL_DIR}/*\n[xml]\noutput = cobertura.xml" > .coveragerc
         echo "SITE_DIR=${SITE_DIR}" >> $GITHUB_ENV
diff --git a/.github/actions/linux_install/action.yml b/.github/actions/linux_install/action.yml
index 8fb5cd8505..0ef9a69b8e 100644
--- a/.github/actions/linux_install/action.yml
+++ b/.github/actions/linux_install/action.yml
@@ -9,22 +9,22 @@ runs:
       shell: bash
     - name: Install fortran
       run:
-        sudo apt-get install gfortran
+        sudo apt-get install -y gfortran
       shell: bash
     - name: Install LaPack
       run:
-        sudo apt-get install libblas-dev liblapack-dev
+        sudo apt-get install -y libblas-dev liblapack-dev
       shell: bash
     - name: Install MPI
       run: |
-        sudo apt-get install libopenmpi-dev openmpi-bin
+        sudo apt-get install -y libopenmpi-dev openmpi-bin
         echo "MPI_OPTS=--oversubscribe" >> $GITHUB_ENV
       shell: bash
     - name: Install OpenMP
       run:
-        sudo apt-get install libomp-dev libomp5
+        sudo apt-get install -y libomp-dev libomp5
       shell: bash
     - name: Install Valgrind
       run:
-        sudo apt-get install valgrind
+        sudo apt-get install -y valgrind
       shell: bash
diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml
index 0b6f0f988d..b0bdc31f16 100644
--- a/.github/actions/pytest_run/action.yml
+++ b/.github/actions/pytest_run/action.yml
@@ -51,13 +51,13 @@ runs:
       working-directory: ./tests
       id: pytest_3
     - name: Test Fortran translations
-      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
+      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
       id: pytest_4
     - name: Test multi-file Fortran translations
       run: |
-        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
+        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml
new file mode 100644
index 0000000000..52092a6e02
--- /dev/null
+++ b/.github/actions/pytest_run_cuda/action.yml
@@ -0,0 +1,17 @@
+name: 'Pyccel pytest commands generating Ccuda'
+inputs:
+  shell_cmd:
+    description: 'Specifies the shell command (different for anaconda)'
+    required: false
+    default: "bash"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Ccuda tests with pytest
+      run: |
+        # Catch exit 5 (no tests found)
+        sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
+        pyccel-clean
+      shell: ${{ inputs.shell_cmd }}
+      working-directory: ./tests
diff --git a/.github/actions/python_install/action.yml b/.github/actions/python_install/action.yml
new file mode 100644
index 0000000000..f9b720e3e1
--- /dev/null
+++ b/.github/actions/python_install/action.yml
@@ -0,0 +1,17 @@
+name: 'Python installation commands'
+
+runs:
+  using: "composite"
+  steps:
+    - name: Install python
+      run:
+        sudo apt-get -y install python3-dev
+      shell: bash
+    - name: python as python3
+      run:
+        sudo apt-get -y install python-is-python3
+      shell: bash
+    - name: Install Pip
+      run:
+        sudo apt-get -y install python3-pip
+      shell: bash
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
new file mode 100644
index 0000000000..833ebf5d85
--- /dev/null
+++ b/.github/workflows/cuda.yml
@@ -0,0 +1,83 @@
+name: Cuda unit tests
+
+on:
+  workflow_dispatch:
+    inputs:
+      python_version:
+        required: false
+        type: string
+      ref:
+        required: false
+        type: string
+      check_run_id:
+        required: false
+        type: string
+      pr_repo:
+        required: false
+        type: string
+  push:
+    branches: [devel, main]
+
+env:
+  COMMIT: ${{ inputs.ref || github.event.ref }}
+  PEM: ${{ secrets.BOT_PEM }}
+  GITHUB_RUN_ID: ${{ github.run_id }}
+  GITHUB_CHECK_RUN_ID: ${{ inputs.check_run_id }}
+  PR_REPO: ${{ inputs.pr_repo || github.repository }}
+
+jobs:
+  Cuda:
+
+    runs-on: ubuntu-20.04
+    name: Unit tests
+
+    container: nvidia/cuda:11.7.1-devel-ubuntu20.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          ref: ${{ env.COMMIT }}
+          repository: ${{ env.PR_REPO }}
+      - name: Prepare docker
+        run: |
+          apt update && apt install sudo
+          TZ=Europe/France
+          ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+          DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata
+        shell: bash
+      - name: Install python (setup-python action doesn't work with containers)
+        uses: ./.github/actions/python_install
+      - name: "Setup"
+        id: token
+        run: |
+          pip install jwt requests
+          python ci_tools/setup_check_run.py cuda
+      - name: CUDA Version
+        run: nvcc --version # cuda install check
+      - name: Install dependencies
+        uses: ./.github/actions/linux_install
+      - name: Install Pyccel with tests
+        run: |
+            PATH=${PATH}:$HOME/.local/bin
+            echo "PATH=${PATH}" >> $GITHUB_ENV
+            python -m pip install --upgrade pip
+            python -m pip install --user .[test]
+        shell: bash
+      - name: Coverage install
+        uses: ./.github/actions/coverage_install
+      - name: Ccuda tests with pytest
+        id: cuda_pytest
+        uses: ./.github/actions/pytest_run_cuda
+      - name: Collect coverage information
+        continue-on-error: True
+        uses: ./.github/actions/coverage_collection
+      - name: Save code coverage report
+        uses: actions/upload-artifact@v3
+        with:
+          name: coverage-artifact
+          path: .coverage
+          retention-days: 1
+      - name: "Post completed"
+        if: always()
+        run:
+          python ci_tools/complete_check_run.py ${{ steps.cuda_pytest.outcome }}
+
diff --git a/ci_tools/bot_messages/show_tests.txt b/ci_tools/bot_messages/show_tests.txt
index adc07e8431..eb15492d2e 100644
--- a/ci_tools/bot_messages/show_tests.txt
+++ b/ci_tools/bot_messages/show_tests.txt
@@ -2,6 +2,7 @@ The following is a list of keywords which can be used to run tests. Tests in bol
 - **linux** : Runs the unit tests on a Linux system.
 - **windows** : Runs the unit tests on a Windows system.
 - **macosx** : Runs the unit tests on a MacOS X system.
+- **cuda** : Runs the cuda unit tests on a Linux system.
 - **coverage** : Runs the unit tests on a Linux system and checks the coverage of the tests.
 - **docs** : Checks if the documentation follows the numpydoc format.
 - **pylint** : Runs pylint on files which are too big to be handled by codacy.
diff --git a/ci_tools/bot_tools/bot_funcs.py b/ci_tools/bot_tools/bot_funcs.py
index 7084a01bb9..1621d1d089 100644
--- a/ci_tools/bot_tools/bot_funcs.py
+++ b/ci_tools/bot_tools/bot_funcs.py
@@ -23,7 +23,8 @@
         'pyccel_lint': '3.8',
         'pylint': '3.8',
         'spelling': '3.8',
-        'windows': '3.8'
+        'windows': '3.8',
+        'cuda': '-'
         }
 
 test_names = {
@@ -40,15 +41,16 @@
         'pyccel_lint': "Pyccel best practices",
         'pylint': "Python linting",
         'spelling': "Spelling verification",
-        'windows': "Unit tests on Windows"
+        'windows': "Unit tests on Windows",
+        'cuda': "Unit tests on Linux with cuda"
         }
 
-test_dependencies = {'coverage':['linux']}
+test_dependencies = {'coverage':['linux', 'cuda']}
 
 tests_with_base = ('coverage', 'docs', 'pyccel_lint', 'pylint')
 
 pr_test_keys = ('linux', 'windows', 'macosx', 'coverage', 'docs', 'pylint',
-                'pyccel_lint', 'spelling')
+                'pyccel_lint', 'spelling', 'cuda')
 
 review_stage_labels = ["needs_initial_review", "Ready_for_review", "Ready_to_merge"]
 
@@ -420,7 +422,7 @@ def is_test_required(self, commit_log, name, key, state):
             True if the test should be run, False otherwise.
         """
         print("Checking : ", name, key)
-        if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel'):
+        if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel', 'cuda'):
             has_relevant_change = lambda diff: any((f.startswith('pyccel/') or f.startswith('tests/')) #pylint: disable=unnecessary-lambda-assignment
                                                     and f.endswith('.py') and f != 'pyccel/version.py'
                                                     for f in diff)
diff --git a/ci_tools/devel_branch_tests.py b/ci_tools/devel_branch_tests.py
index 1102ef9e92..ec67b6c49a 100644
--- a/ci_tools/devel_branch_tests.py
+++ b/ci_tools/devel_branch_tests.py
@@ -15,3 +15,4 @@
     bot.run_tests(['anaconda_linux'], '3.10', force_run = True)
     bot.run_tests(['anaconda_windows'], '3.10', force_run = True)
     bot.run_tests(['intel'], '3.9', force_run = True)
+    bot.run_tests(['cuda'], '-', force_run = True)
diff --git a/ci_tools/json_pytest_output.py b/ci_tools/json_pytest_output.py
index 409ae76d72..b84f4a4c09 100644
--- a/ci_tools/json_pytest_output.py
+++ b/ci_tools/json_pytest_output.py
@@ -61,7 +61,7 @@ def     mini_md_summary(title, outcome, failed_tests):
     summary = ""
 
     failed_pattern = re.compile(r".*FAILED.*")
-    languages = ('c', 'fortran', 'python')
+    languages = ('c', 'fortran', 'python', 'cuda')
     pattern = {lang: re.compile(r".*\["+lang+r"\]\ \_.*") for lang in languages}
 
     for i in p_args.tests:

From e07587d8cec95efcb0bcb53bda468a549c4d941c Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:41:27 +0100
Subject: [PATCH 062/130] Trigger tests on push to devel or main branch

---
 .github/workflows/deploy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 9111b47d52..cf52b1c624 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -10,7 +10,7 @@ jobs:
   waitForWorklows:
     name: Wait for workflows
     runs-on: ubuntu-latest
-    if: github.event.workflow_run.head_branch == 'main'
+    if: github.event.workflow_run.head_branch == 'main' && github.repository == 'pyccel/pyccel'
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4

From 94b13575c2baad7d8c2d6bcce0e4c443716c0b47 Mon Sep 17 00:00:00 2001
From: bauom <40796259+bauom@users.noreply.github.com>
Date: Wed, 28 Feb 2024 18:11:50 +0100
Subject: [PATCH 063/130] [init] Adding CUDA language/compiler and CodePrinter
 (#32)

This PR aims to make the C code compilable using nvcc. The cuda language was added as well as a CudaCodePrinter.

Changes to stdlib:

Wrapped expressions using complex types in an `ifndef __NVCC__` to avoid processing them with the nvcc compiler

---------

Co-authored-by: Mouad Elalj, EmilyBourne
---
 .dict_custom.txt                           |   1 +
 .github/actions/pytest_parallel/action.yml |   4 +-
 .github/actions/pytest_run/action.yml      |   4 +-
 .github/actions/pytest_run_cuda/action.yml |  11 +-
 CHANGELOG.md                               |   6 +
 pyccel/codegen/codegen.py                  |   8 +-
 pyccel/codegen/compiling/compilers.py      |   5 +-
 pyccel/codegen/pipeline.py                 |   5 +-
 pyccel/codegen/printing/cucode.py          |  74 +++++++++++
 pyccel/commands/console.py                 |   2 +-
 pyccel/compilers/default_compilers.py      |  13 +-
 pyccel/naming/__init__.py                  |   4 +-
 pyccel/naming/cudanameclashchecker.py      |  92 ++++++++++++++
 pyccel/stdlib/numpy/numpy_c.c              |   2 +
 pyccel/stdlib/numpy/numpy_c.h              |   2 +
 pytest.ini                                 |   1 +
 tests/conftest.py                          |  11 ++
 tests/epyccel/test_base.py                 | 136 ++++++++++-----------
 18 files changed, 298 insertions(+), 83 deletions(-)
 create mode 100644 pyccel/codegen/printing/cucode.py
 create mode 100644 pyccel/naming/cudanameclashchecker.py

diff --git a/.dict_custom.txt b/.dict_custom.txt
index b9240f6215..161337d33b 100644
--- a/.dict_custom.txt
+++ b/.dict_custom.txt
@@ -110,6 +110,7 @@ Valgrind
 variadic
 subclasses
 oneAPI
+Cuda
 getter
 setter
 bitwise
diff --git a/.github/actions/pytest_parallel/action.yml b/.github/actions/pytest_parallel/action.yml
index c7c77d99c7..f91d84915b 100644
--- a/.github/actions/pytest_parallel/action.yml
+++ b/.github/actions/pytest_parallel/action.yml
@@ -10,8 +10,8 @@ runs:
   steps:
     - name: Test with pytest
       run: |
-        mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m parallel -rXx
-        #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m parallel -rXx
+        mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m "parallel and not cuda" -rXx
+        #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m "parallel and not cuda" -rXx
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
 
diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml
index b0bdc31f16..451fa39e92 100644
--- a/.github/actions/pytest_run/action.yml
+++ b/.github/actions/pytest_run/action.yml
@@ -51,13 +51,13 @@ runs:
       working-directory: ./tests
       id: pytest_3
     - name: Test Fortran translations
-      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
+      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
       id: pytest_4
     - name: Test multi-file Fortran translations
       run: |
-        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
+        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml
index 52092a6e02..46f90552ed 100644
--- a/.github/actions/pytest_run_cuda/action.yml
+++ b/.github/actions/pytest_run_cuda/action.yml
@@ -1,4 +1,4 @@
-name: 'Pyccel pytest commands generating Ccuda'
+name: 'Pyccel pytest commands generating Cuda'
 inputs:
   shell_cmd:
     description: 'Specifies the shell command (different for anaconda)'
@@ -11,7 +11,14 @@ runs:
     - name: Ccuda tests with pytest
       run: |
         # Catch exit 5 (no tests found)
-        sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
+        python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
+    - name: Final step
+      if: always()
+      id: status
+      run:
+        python ci_tools/json_pytest_output.py -t "Cuda Test Summary" --tests "Cuda tests:${{ steps.pytest_1.outcome }}:tests/s1_outfile.out"
+                      
+      shell: ${{ inputs.shell_cmd }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 60e982aa70..a7048916d6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,12 @@
 # Change Log
 All notable changes to this project will be documented in this file.
 
+## \[Cuda - UNRELEASED\]
+
+### Added
+
+-   #32 : add support for `nvcc` Compiler and `cuda` language as a possible option.
+
 ## \[UNRELEASED\]
 
 ### Added
diff --git a/pyccel/codegen/codegen.py b/pyccel/codegen/codegen.py
index a7a02d7804..33721a48e8 100644
--- a/pyccel/codegen/codegen.py
+++ b/pyccel/codegen/codegen.py
@@ -9,16 +9,18 @@
 from pyccel.codegen.printing.fcode  import FCodePrinter
 from pyccel.codegen.printing.ccode  import CCodePrinter
 from pyccel.codegen.printing.pycode import PythonCodePrinter
+from pyccel.codegen.printing.cucode import CudaCodePrinter
 
 from pyccel.ast.core      import FunctionDef, Interface, ModuleHeader
 from pyccel.utilities.stage import PyccelStage
 
-_extension_registry = {'fortran': 'f90', 'c':'c',  'python':'py'}
-_header_extension_registry = {'fortran': None, 'c':'h',  'python':None}
+_extension_registry = {'fortran': 'f90', 'c':'c',  'python':'py', 'cuda':'cu'}
+_header_extension_registry = {'fortran': None, 'c':'h',  'python':None, 'cuda':'h'}
 printer_registry    = {
                         'fortran':FCodePrinter,
                         'c':CCodePrinter,
-                        'python':PythonCodePrinter
+                        'python':PythonCodePrinter,
+                        'cuda':CudaCodePrinter
                       }
 
 pyccel_stage = PyccelStage()
diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py
index c866ee5b1a..d909a5036e 100644
--- a/pyccel/codegen/compiling/compilers.py
+++ b/pyccel/codegen/compiling/compilers.py
@@ -444,7 +444,10 @@ def compile_shared_library(self, compile_obj, output_folder, verbose = False, sh
         # Collect compile information
         exec_cmd, includes, libs_flags, libdirs_flags, m_code = \
                 self._get_compile_components(compile_obj, accelerators)
-        linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags]
+        if self._info['exec'] == 'nvcc':
+            linker_libdirs_flags = ['-Xcompiler' if l == '-L' else f'"-Wl,-rpath,{l}"' for l in libdirs_flags]
+        else:
+            linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags]
 
         flags.insert(0,"-shared")
 
diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py
index 14087fb567..eb357fab74 100644
--- a/pyccel/codegen/pipeline.py
+++ b/pyccel/codegen/pipeline.py
@@ -180,9 +180,10 @@ def handle_error(stage):
     if language is None:
         language = 'fortran'
 
-    # Choose Fortran compiler
+    # Choose Default compiler
     if compiler is None:
-        compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', 'GNU')
+        default_compiler_family = 'nvidia' if language == 'cuda' else 'GNU'
+        compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', default_compiler_family)
 
     fflags = [] if fflags is None else fflags.split()
     wrapper_flags = [] if wrapper_flags is None else wrapper_flags.split()
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
new file mode 100644
index 0000000000..86146b065b
--- /dev/null
+++ b/pyccel/codegen/printing/cucode.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Provide tools for generating and handling CUDA code.
+This module is designed to interface Pyccel's Abstract Syntax Tree (AST) with CUDA,
+enabling the direct translation of high-level Pyccel expressions into CUDA code.
+"""
+
+from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers
+
+from pyccel.ast.core        import Import, Module
+
+from pyccel.errors.errors   import Errors
+
+
+errors = Errors()
+
+__all__ = ["CudaCodePrinter"]
+
+class CudaCodePrinter(CCodePrinter):
+    """
+    Print code in CUDA format.
+
+    This printer converts Pyccel's Abstract Syntax Tree (AST) into strings of CUDA code.
+    Navigation through this file utilizes _print_X functions,
+    as is common with all printers.
+
+    Parameters
+    ----------
+    filename : str
+            The name of the file being pyccelised.
+    prefix_module : str
+            A prefix to be added to the name of the module.
+    """
+    language = "cuda"
+
+    def __init__(self, filename, prefix_module = None):
+
+        errors.set_target(filename)
+
+        super().__init__(filename)
+
+    def _print_Module(self, expr):
+        self.set_scope(expr.scope)
+        self._current_module = expr.name
+        body = ''.join(self._print(i) for i in expr.body)
+
+        global_variables = ''.join(self._print(d) for d in expr.declarations)
+
+        # Print imports last to be sure that all additional_imports have been collected
+        imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()]
+        c_headers_imports = ''
+        local_imports = ''
+
+        for imp in imports:
+            if imp.source in c_library_headers:
+                c_headers_imports += self._print(imp)
+            else:
+                local_imports += self._print(imp)
+
+        imports = f'{c_headers_imports}\
+                    extern "C"{{\n\
+                    {local_imports}\
+                    }}'
+
+        code = f'{imports}\n\
+                 {global_variables}\n\
+                 {body}\n'
+
+        self.exit_scope()
+        return code
diff --git a/pyccel/commands/console.py b/pyccel/commands/console.py
index 596c440ec0..fcbec009de 100644
--- a/pyccel/commands/console.py
+++ b/pyccel/commands/console.py
@@ -80,7 +80,7 @@ def pyccel(files=None, mpi=None, openmp=None, openacc=None, output_dir=None, com
     # ... backend compiler options
     group = parser.add_argument_group('Backend compiler options')
 
-    group.add_argument('--language', choices=('fortran', 'c', 'python'), help='Generated language')
+    group.add_argument('--language', choices=('fortran', 'c', 'python', 'cuda'), help='Generated language')
 
     group.add_argument('--compiler', help='Compiler family or json file containing a compiler description {GNU,intel,PGI}')
 
diff --git a/pyccel/compilers/default_compilers.py b/pyccel/compilers/default_compilers.py
index 166085d22e..d47856773c 100644
--- a/pyccel/compilers/default_compilers.py
+++ b/pyccel/compilers/default_compilers.py
@@ -185,6 +185,15 @@
                 },
             'family': 'nvidia',
             }
+#------------------------------------------------------------
+nvcc_info = {'exec'         : 'nvcc',
+             'language'     : 'cuda',
+             'debug_flags'  : ("-g",),
+             'release_flags': ("-O3",),
+             'general_flags': ('--compiler-options', '-fPIC',),
+             'family'       : 'nvidia'
+            }
+
 
 #------------------------------------------------------------
 def change_to_lib_flag(lib):
@@ -288,6 +297,7 @@ def change_to_lib_flag(lib):
 pgfortran_info.update(python_info)
 nvc_info.update(python_info)
 nvfort_info.update(python_info)
+nvcc_info.update(python_info)
 
 available_compilers = {('GNU', 'c') : gcc_info,
                        ('GNU', 'fortran') : gfort_info,
@@ -296,6 +306,7 @@ def change_to_lib_flag(lib):
                        ('PGI', 'c') : pgcc_info,
                        ('PGI', 'fortran') : pgfortran_info,
                        ('nvidia', 'c') : nvc_info,
-                       ('nvidia', 'fortran') : nvfort_info}
+                       ('nvidia', 'fortran') : nvfort_info,
+                       ('nvidia', 'cuda'): nvcc_info}
 
 vendors = ('GNU','intel','PGI','nvidia')
diff --git a/pyccel/naming/__init__.py b/pyccel/naming/__init__.py
index 72c318d3ad..b3e4bbbe0e 100644
--- a/pyccel/naming/__init__.py
+++ b/pyccel/naming/__init__.py
@@ -10,7 +10,9 @@
 from .fortrannameclashchecker import FortranNameClashChecker
 from .cnameclashchecker import CNameClashChecker
 from .pythonnameclashchecker import PythonNameClashChecker
+from .cudanameclashchecker import CudaNameClashChecker
 
 name_clash_checkers = {'fortran':FortranNameClashChecker(),
         'c':CNameClashChecker(),
-        'python':PythonNameClashChecker()}
+        'python':PythonNameClashChecker(),
+        'cuda':CudaNameClashChecker()}
diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py
new file mode 100644
index 0000000000..971204e912
--- /dev/null
+++ b/pyccel/naming/cudanameclashchecker.py
@@ -0,0 +1,92 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Handles name clash problems in Cuda
+"""
+from .languagenameclashchecker import LanguageNameClashChecker
+
+class CudaNameClashChecker(LanguageNameClashChecker):
+    """
+    Class containing functions to help avoid problematic names in Cuda.
+
+    A class which provides functionalities to check or propose variable names and
+    verify that they do not cause name clashes. Name clashes may be due to
+    new variables, or due to the use of reserved keywords.
+    """
+    # Keywords as mentioned on https://en.cppreference.com/w/c/keyword
+    keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const',
+        'continue', 'default', 'do', 'double', 'else', 'enum',
+        'extern', 'float', 'for', 'goto', 'if', 'inline', 'int',
+        'long', 'register', 'restrict', 'return', 'short', 'signed',
+        'sizeof', 'static', 'struct', 'switch', 'typedef', 'union',
+        'unsigned', 'void', 'volatile', 'whie', '_Alignas',
+        '_Alignof', '_Atomic', '_Bool', '_Complex', 'Decimal128',
+        '_Decimal32', '_Decimal64', '_Generic', '_Imaginary',
+        '_Noreturn', '_Static_assert', '_Thread_local', 't_ndarray',
+        'array_create', 'new_slice', 'array_slicing', 'alias_assign',
+        'transpose_alias_assign', 'array_fill', 't_slice',
+        'GET_INDEX_EXP1', 'GET_INDEX_EXP2', 'GET_INDEX_EXP2',
+        'GET_INDEX_EXP3', 'GET_INDEX_EXP4', 'GET_INDEX_EXP5',
+        'GET_INDEX_EXP6', 'GET_INDEX_EXP7', 'GET_INDEX_EXP8',
+        'GET_INDEX_EXP9', 'GET_INDEX_EXP10', 'GET_INDEX_EXP11',
+        'GET_INDEX_EXP12', 'GET_INDEX_EXP13', 'GET_INDEX_EXP14',
+        'GET_INDEX_EXP15', 'NUM_ARGS_H1', 'NUM_ARGS',
+        'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX',
+        'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer',
+        'get_index', 'numpy_to_ndarray_strides',
+        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'])
+
+    def has_clash(self, name, symbols):
+        """
+        Indicate whether the proposed name causes any clashes.
+
+        Checks if a suggested name conflicts with predefined
+        keywords or specified symbols,returning true for a clash.
+        This method is crucial for maintaining namespace integrity and
+        preventing naming conflicts in code generation processes.
+
+        Parameters
+        ----------
+        name : str
+            The suggested name.
+        symbols : set
+            Symbols which should be considered as collisions.
+
+        Returns
+        -------
+        bool
+            True if the name is a collision.
+            False if the name is collision free.
+        """
+        return any(name == k for k in self.keywords) or \
+               any(name == s for s in symbols)
+
+    def get_collisionless_name(self, name, symbols):
+        """
+        Get a valid name which doesn't collision with symbols or Cuda keywords.
+
+        Find a new name based on the suggested name which will not cause
+        conflicts with Cuda keywords, does not appear in the provided symbols,
+        and is a valid name in Cuda code.
+
+        Parameters
+        ----------
+        name : str
+            The suggested name.
+        symbols : set
+            Symbols which should be considered as collisions.
+
+        Returns
+        -------
+        str
+            A new name which is collision free.
+        """
+        if len(name)>4 and all(name[i] == '_' for i in (0,1,-1,-2)):
+            # Ignore magic methods
+            return name
+        if name[0] == '_':
+            name = 'private'+name
+        return self._get_collisionless_name(name, symbols)
diff --git a/pyccel/stdlib/numpy/numpy_c.c b/pyccel/stdlib/numpy/numpy_c.c
index 7c9ecbbf6b..bc56214772 100644
--- a/pyccel/stdlib/numpy/numpy_c.c
+++ b/pyccel/stdlib/numpy/numpy_c.c
@@ -17,8 +17,10 @@ double  fsign(double x)
     return SIGN(x);
 }
 
+#ifndef __NVCC__
 /* numpy.sign for complex */
 double complex csign(double complex x)
 {
     return x ? ((!creal(x) && cimag(x) < 0) || (creal(x) < 0) ? -1 : 1) : 0;
 }
+#endif
diff --git a/pyccel/stdlib/numpy/numpy_c.h b/pyccel/stdlib/numpy/numpy_c.h
index e72cf3ad57..c2a16a5516 100644
--- a/pyccel/stdlib/numpy/numpy_c.h
+++ b/pyccel/stdlib/numpy/numpy_c.h
@@ -15,6 +15,8 @@
 
 long long int isign(long long int x);
 double fsign(double x);
+#ifndef __NVCC__
 double complex csign(double complex x);
+#endif
 
 #endif
diff --git a/pytest.ini b/pytest.ini
index 42eb0d72ba..3792ab65f9 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -9,3 +9,4 @@ markers =
     python: test to generate python code
     xdist_incompatible: test which compiles a file also compiled by another test
     external: test using an external dll (problematic with conda on Windows)
+    cuda: test to generate cuda code
diff --git a/tests/conftest.py b/tests/conftest.py
index 79144b6978..a5082ef6e8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -21,6 +21,17 @@
 def language(request):
     return request.param
 
+@pytest.fixture( params=[
+        pytest.param("fortran", marks = pytest.mark.fortran),
+        pytest.param("c", marks = pytest.mark.c),
+        pytest.param("python", marks = pytest.mark.python),
+        pytest.param("cuda", marks = pytest.mark.cuda)
+    ],
+    scope = "session"
+)
+def language_with_cuda(request):
+    return request.param
+
 def move_coverage(path_dir):
     for root, _, files in os.walk(path_dir):
         for name in files:
diff --git a/tests/epyccel/test_base.py b/tests/epyccel/test_base.py
index c22064d321..413f79eef1 100644
--- a/tests/epyccel/test_base.py
+++ b/tests/epyccel/test_base.py
@@ -7,128 +7,128 @@
 from utilities import epyccel_test
 
 
-def test_is_false(language):
-    test = epyccel_test(base.is_false, lang=language)
+def test_is_false(language_with_cuda):
+    test = epyccel_test(base.is_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_is_true(language):
-    test = epyccel_test(base.is_true, lang=language)
+def test_is_true(language_with_cuda):
+    test = epyccel_test(base.is_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_compare_is(language):
-    test = epyccel_test(base.compare_is, lang=language)
+def test_compare_is(language_with_cuda):
+    test = epyccel_test(base.compare_is, lang=language_with_cuda)
     test.compare_epyccel( True, True )
     test.compare_epyccel( True, False )
     test.compare_epyccel( False, True )
     test.compare_epyccel( False, False )
 
-def test_compare_is_not(language):
-    test = epyccel_test(base.compare_is_not, lang=language)
+def test_compare_is_not(language_with_cuda):
+    test = epyccel_test(base.compare_is_not, lang=language_with_cuda)
     test.compare_epyccel( True, True )
     test.compare_epyccel( True, False )
     test.compare_epyccel( False, True )
     test.compare_epyccel( False, False )
 
-def test_compare_is_int(language):
-    test = epyccel_test(base.compare_is_int, lang=language)
+def test_compare_is_int(language_with_cuda):
+    test = epyccel_test(base.compare_is_int, lang=language_with_cuda)
     test.compare_epyccel( True, 1 )
     test.compare_epyccel( True, 0 )
     test.compare_epyccel( False, 1 )
     test.compare_epyccel( False, 0 )
 
-def test_compare_is_not_int(language):
-    test = epyccel_test(base.compare_is_not_int, lang=language)
+def test_compare_is_not_int(language_with_cuda):
+    test = epyccel_test(base.compare_is_not_int, lang=language_with_cuda)
     test.compare_epyccel( True, 1 )
     test.compare_epyccel( True, 0 )
     test.compare_epyccel( False, 1 )
     test.compare_epyccel( False, 0 )
 
-def test_not_false(language):
-    test = epyccel_test(base.not_false, lang=language)
+def test_not_false(language_with_cuda):
+    test = epyccel_test(base.not_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not_true(language):
-    test = epyccel_test(base.not_true, lang=language)
+def test_not_true(language_with_cuda):
+    test = epyccel_test(base.not_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_eq_false(language):
-    test = epyccel_test(base.eq_false, lang=language)
+def test_eq_false(language_with_cuda):
+    test = epyccel_test(base.eq_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_eq_true(language):
-    test = epyccel_test(base.eq_true, lang=language)
+def test_eq_true(language_with_cuda):
+    test = epyccel_test(base.eq_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_neq_false(language):
-    test = epyccel_test(base.eq_false, lang=language)
+def test_neq_false(language_with_cuda):
+    test = epyccel_test(base.eq_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_neq_true(language):
-    test = epyccel_test(base.eq_true, lang=language)
+def test_neq_true(language_with_cuda):
+    test = epyccel_test(base.eq_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not(language):
-    test = epyccel_test(base.not_val, lang=language)
+def test_not(language_with_cuda):
+    test = epyccel_test(base.not_val, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not_int(language):
-    test = epyccel_test(base.not_int, lang=language)
+def test_not_int(language_with_cuda):
+    test = epyccel_test(base.not_int, lang=language_with_cuda)
     test.compare_epyccel( 0 )
     test.compare_epyccel( 4 )
 
-def test_compare_is_nil(language):
-    test = epyccel_test(base.is_nil, lang=language)
+def test_compare_is_nil(language_with_cuda):
+    test = epyccel_test(base.is_nil, lang=language_with_cuda)
     test.compare_epyccel( None )
 
-def test_compare_is_not_nil(language):
-    test = epyccel_test(base.is_not_nil, lang=language)
+def test_compare_is_not_nil(language_with_cuda):
+    test = epyccel_test(base.is_not_nil, lang=language_with_cuda)
     test.compare_epyccel( None )
 
-def test_cast_int(language):
-    test = epyccel_test(base.cast_int, lang=language)
+def test_cast_int(language_with_cuda):
+    test = epyccel_test(base.cast_int, lang=language_with_cuda)
     test.compare_epyccel( 4 )
-    test = epyccel_test(base.cast_float_to_int, lang=language)
+    test = epyccel_test(base.cast_float_to_int, lang=language_with_cuda)
     test.compare_epyccel( 4.5 )
 
-def test_cast_bool(language):
-    test = epyccel_test(base.cast_bool, lang=language)
+def test_cast_bool(language_with_cuda):
+    test = epyccel_test(base.cast_bool, lang=language_with_cuda)
     test.compare_epyccel( True )
 
-def test_cast_float(language):
-    test = epyccel_test(base.cast_float, lang=language)
+def test_cast_float(language_with_cuda):
+    test = epyccel_test(base.cast_float, lang=language_with_cuda)
     test.compare_epyccel( 4.5 )
-    test = epyccel_test(base.cast_int_to_float, lang=language)
+    test = epyccel_test(base.cast_int_to_float, lang=language_with_cuda)
     test.compare_epyccel( 4 )
 
-def test_if_0_int(language):
-    test = epyccel_test(base.if_0_int, lang=language)
+def test_if_0_int(language_with_cuda):
+    test = epyccel_test(base.if_0_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
     test.compare_epyccel( 0 )
 
-def test_if_0_real(language):
-    test = epyccel_test(base.if_0_real, lang=language)
+def test_if_0_real(language_with_cuda):
+    test = epyccel_test(base.if_0_real, lang=language_with_cuda)
     test.compare_epyccel( 22.3 )
     test.compare_epyccel( 0.0 )
 
-def test_same_int(language):
-    test = epyccel_test(base.is_same_int, lang=language)
+def test_same_int(language_with_cuda):
+    test = epyccel_test(base.is_same_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
-    test = epyccel_test(base.isnot_same_int, lang=language)
+    test = epyccel_test(base.isnot_same_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
 
-def test_same_float(language):
-    test = epyccel_test(base.is_same_float, lang=language)
+def test_same_float(language_with_cuda):
+    test = epyccel_test(base.is_same_float, lang=language_with_cuda)
     test.compare_epyccel( 22.2 )
-    test = epyccel_test(base.isnot_same_float, lang=language)
+    test = epyccel_test(base.isnot_same_float, lang=language_with_cuda)
     test.compare_epyccel( 22.2 )
 
 @pytest.mark.parametrize( 'language', [
@@ -150,28 +150,28 @@ def test_same_complex(language):
     test = epyccel_test(base.isnot_same_complex, lang=language)
     test.compare_epyccel( complex(2,3) )
 
-def test_is_types(language):
-    test = epyccel_test(base.is_types, lang=language)
+def test_is_types(language_with_cuda):
+    test = epyccel_test(base.is_types, lang=language_with_cuda)
     test.compare_epyccel( 1, 1.0 )
 
-def test_isnot_types(language):
-    test = epyccel_test(base.isnot_types, lang=language)
+def test_isnot_types(language_with_cuda):
+    test = epyccel_test(base.isnot_types, lang=language_with_cuda)
     test.compare_epyccel( 1, 1.0 )
 
-def test_none_is_none(language):
-    test = epyccel_test(base.none_is_none, lang=language)
+def test_none_is_none(language_with_cuda):
+    test = epyccel_test(base.none_is_none, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_none_isnot_none(language):
-    test = epyccel_test(base.none_isnot_none, lang=language)
+def test_none_isnot_none(language_with_cuda):
+    test = epyccel_test(base.none_isnot_none, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_pass_if(language):
-    test = epyccel_test(base.pass_if, lang=language)
+def test_pass_if(language_with_cuda):
+    test = epyccel_test(base.pass_if, lang=language_with_cuda)
     test.compare_epyccel(2)
 
-def test_pass2_if(language):
-    test = epyccel_test(base.pass2_if, lang=language)
+def test_pass2_if(language_with_cuda):
+    test = epyccel_test(base.pass2_if, lang=language_with_cuda)
     test.compare_epyccel(0.2)
     test.compare_epyccel(0.0)
 
@@ -192,15 +192,15 @@ def test_use_optional(language):
     test.compare_epyccel()
     test.compare_epyccel(6)
 
-def test_none_equality(language):
-    test = epyccel_test(base.none_equality, lang=language)
+def test_none_equality(language_with_cuda):
+    test = epyccel_test(base.none_equality, lang=language_with_cuda)
     test.compare_epyccel()
     test.compare_epyccel(6)
 
-def test_none_none_equality(language):
-    test = epyccel_test(base.none_none_equality, lang=language)
+def test_none_none_equality(language_with_cuda):
+    test = epyccel_test(base.none_none_equality, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_none_literal_equality(language):
-    test = epyccel_test(base.none_literal_equality, lang=language)
+def test_none_literal_equality(language_with_cuda):
+    test = epyccel_test(base.none_literal_equality, lang=language_with_cuda)
     test.compare_epyccel()

From c0006dd94302e9e4781ca960e67832a91b0868ca Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 15 May 2024 12:58:50 +0100
Subject: [PATCH 064/130] Fix import handling (#49)

This pull request fixes https://github.com/pyccel/pyccel-cuda/issues/48, by implementing a tiny wrapper for CUDA and a wrapper for non-CUDA functionalities only with external 'C'.

**Commit Summary**

-    Implemented new header printer for CUDA.
-    Added CUDA wrapper assignment
-    Instead of wrapping all local headers, wrap only C functions with extern 'C'

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
---
 CHANGELOG.md                                |  3 +-
 pyccel/codegen/printing/cucode.py           | 45 ++++++++----
 pyccel/codegen/python_wrapper.py            |  4 ++
 pyccel/codegen/wrapper/cuda_to_c_wrapper.py | 78 +++++++++++++++++++++
 tests/epyccel/modules/cuda_module.py        | 13 ++++
 tests/epyccel/test_epyccel_modules.py       | 13 ++++
 6 files changed, 142 insertions(+), 14 deletions(-)
 create mode 100644 pyccel/codegen/wrapper/cuda_to_c_wrapper.py
 create mode 100644 tests/epyccel/modules/cuda_module.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a7048916d6..8885e66107 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,8 @@ All notable changes to this project will be documented in this file.
 
 ### Added
 
--   #32 : add support for `nvcc` Compiler and `cuda` language as a possible option.
+-   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
+-   #48 : Fix incorrect handling of imports in `cuda`.
 
 ## \[UNRELEASED\]
 
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 86146b065b..277d2a3a6a 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -52,19 +52,7 @@ def _print_Module(self, expr):
 
         # Print imports last to be sure that all additional_imports have been collected
         imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()]
-        c_headers_imports = ''
-        local_imports = ''
-
-        for imp in imports:
-            if imp.source in c_library_headers:
-                c_headers_imports += self._print(imp)
-            else:
-                local_imports += self._print(imp)
-
-        imports = f'{c_headers_imports}\
-                    extern "C"{{\n\
-                    {local_imports}\
-                    }}'
+        imports = ''.join(self._print(i) for i in imports)
 
         code = f'{imports}\n\
                  {global_variables}\n\
@@ -72,3 +60,34 @@ def _print_Module(self, expr):
 
         self.exit_scope()
         return code
+
+    def _print_ModuleHeader(self, expr):
+        self.set_scope(expr.module.scope)
+        self._in_header = True
+        name = expr.module.name
+
+        funcs = ""
+        cuda_headers = ""
+        for f in expr.module.funcs:
+            if not f.is_inline:
+                if 'kernel' in f.decorators:  # Checking for 'kernel' decorator
+                    cuda_headers += self.function_signature(f) + ';\n'
+                else:
+                    funcs += self.function_signature(f) + ';\n'
+        global_variables = ''.join('extern '+self._print(d) for d in expr.module.declarations if not d.variable.is_private)
+        # Print imports last to be sure that all additional_imports have been collected
+        imports = [*expr.module.imports, *self._additional_imports.values()]
+        imports = ''.join(self._print(i) for i in imports)
+
+        self._in_header = False
+        self.exit_scope()
+        function_declaration = f'{cuda_headers}\n\
+                    extern "C"{{\n\
+                    {funcs}\
+                    }}\n'
+        return '\n'.join((f"#ifndef {name.upper()}_H",
+                          f"#define {name.upper()}_H",
+                          global_variables,
+                          function_declaration,
+                          "#endif // {name.upper()}_H\n"))
+
diff --git a/pyccel/codegen/python_wrapper.py b/pyccel/codegen/python_wrapper.py
index 9437727042..62c303fa64 100644
--- a/pyccel/codegen/python_wrapper.py
+++ b/pyccel/codegen/python_wrapper.py
@@ -13,6 +13,7 @@
 from pyccel.codegen.printing.fcode               import FCodePrinter
 from pyccel.codegen.wrapper.fortran_to_c_wrapper import FortranToCWrapper
 from pyccel.codegen.wrapper.c_to_python_wrapper  import CToPythonWrapper
+from pyccel.codegen.wrapper.cuda_to_c_wrapper    import CudaToCWrapper
 from pyccel.codegen.utilities                    import recompile_object
 from pyccel.codegen.utilities                    import copy_internal_library
 from pyccel.codegen.utilities                    import internal_libs
@@ -144,6 +145,9 @@ def create_shared_library(codegen,
                 verbose=verbose)
         timings['Bind C wrapping'] = time.time() - start_bind_c_compiling
         c_ast = bind_c_mod
+    elif language == 'cuda':
+        wrapper = CudaToCWrapper()
+        c_ast = wrapper.wrap(codegen.ast)
     else:
         c_ast = codegen.ast
 
diff --git a/pyccel/codegen/wrapper/cuda_to_c_wrapper.py b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py
new file mode 100644
index 0000000000..c0e24c7c09
--- /dev/null
+++ b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Module describing the code-wrapping class : CudaToPythonWrapper
+which creates an interface exposing Cuda code to C.
+"""
+
+from pyccel.ast.bind_c      import BindCModule
+from pyccel.errors.errors   import Errors
+from pyccel.ast.bind_c      import BindCVariable
+from .wrapper               import Wrapper
+
+errors = Errors()
+
+class CudaToCWrapper(Wrapper):
+    """
+    Class for creating a wrapper exposing Cuda code to C.
+
+    While CUDA is typically compatible with C by default.
+    this wrapper becomes necessary in scenarios where specific adaptations
+    or modifications are required to ensure seamless integration with C.
+    """
+
+    def _wrap_Module(self, expr):
+        """
+        Create a Module which is compatible with C.
+
+        Create a Module which provides an interface between C and the
+        Module described by expr.
+
+        Parameters
+        ----------
+        expr : pyccel.ast.core.Module
+            The module to be wrapped.
+
+        Returns
+        -------
+        pyccel.ast.core.BindCModule
+            The C-compatible module.
+        """
+        init_func = expr.init_func
+        if expr.interfaces:
+            errors.report("Interface wrapping is not yet supported for Cuda",
+                      severity='warning', symbol=expr)
+        if expr.classes:
+            errors.report("Class wrapping is not yet supported for Cuda",
+                      severity='warning', symbol=expr)
+
+        variables = [self._wrap(v) for v in expr.variables]
+
+        return BindCModule(expr.name, variables, expr.funcs,
+                init_func=init_func,
+                scope = expr.scope,
+                original_module=expr)
+
+    def _wrap_Variable(self, expr):
+        """
+        Create all objects necessary to expose a module variable to C.
+
+        Create and return the objects which must be printed in the wrapping
+        module in order to expose the variable to C
+
+        Parameters
+        ----------
+        expr : pyccel.ast.variables.Variable
+            The module variable.
+
+        Returns
+        -------
+        pyccel.ast.core.BindCVariable
+            The C-compatible variable. which must be printed in
+            the wrapping module to expose the variable.
+        """
+        return expr.clone(expr.name, new_class = BindCVariable)
+
diff --git a/tests/epyccel/modules/cuda_module.py b/tests/epyccel/modules/cuda_module.py
new file mode 100644
index 0000000000..bb7ae6b98a
--- /dev/null
+++ b/tests/epyccel/modules/cuda_module.py
@@ -0,0 +1,13 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import numpy as np
+
+g = np.float64(9.81)
+r0 = np.float32(1.0)
+rmin = 0.01
+rmax = 1.0
+
+skip_centre = True
+
+method = 3
+
+tiny = np.int32(4)
diff --git a/tests/epyccel/test_epyccel_modules.py b/tests/epyccel/test_epyccel_modules.py
index ad8ae0bd75..223f741bf0 100644
--- a/tests/epyccel/test_epyccel_modules.py
+++ b/tests/epyccel/test_epyccel_modules.py
@@ -200,3 +200,16 @@ def test_awkward_names(language):
     assert mod.function() == modnew.function()
     assert mod.pure() == modnew.pure()
     assert mod.allocate(1) == modnew.allocate(1)
+
+def test_cuda_module(language_with_cuda):
+    import modules.cuda_module as mod
+
+    modnew = epyccel(mod, language=language_with_cuda)
+
+    atts = ('g', 'r0', 'rmin', 'rmax', 'skip_centre',
+            'method', 'tiny')
+    for att in atts:
+        mod_att = getattr(mod, att)
+        modnew_att = getattr(modnew, att)
+        assert mod_att == modnew_att
+        assert type(mod_att) is type(modnew_att)

From 1145d9151318787947ae30e2775ddfd243ddaeb3 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Thu, 27 Jun 2024 20:31:46 +0100
Subject: [PATCH 065/130] Add support for kernels (#42)

This pull request addresses issue #28 by implementing a new feature in
Pyccel that allows users to define custom GPU kernels. The syntax for
creating these kernels is inspired by Numba. and I also need to fix
issue #45 for testing purposes

**Commit Summary**

- Introduced KernelCall class
- Added cuda printer methods _print_KernelCall and _print_FunctionDef to
generate the corresponding CUDA representation for both kernel calls and
definitions
- Added IndexedFunctionCall  represents an indexed function call
- Added CUDA module and cuda.synchronize()
- Fixing a bug that I found in the header: it does not import the
necessary header for the used function

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
Co-authored-by: Emily Bourne <emily.bourne@epfl.ch>
---
 .dict_custom.txt                              |   1 +
 CHANGELOG.md                                  |   2 +
 docs/cuda.md                                  |  23 +++
 pyccel/ast/core.py                            |  37 ++++
 pyccel/ast/cuda.py                            |  65 +++++++
 pyccel/ast/cudaext.py                         |  42 +++++
 pyccel/ast/utilities.py                       |   4 +-
 pyccel/codegen/printing/cucode.py             |  46 ++++-
 pyccel/cuda/__init__.py                       |  10 +
 pyccel/cuda/cuda_sync_primitives.py           |  16 ++
 pyccel/decorators.py                          |  32 ++++
 pyccel/errors/messages.py                     |   8 +
 pyccel/parser/semantic.py                     |  84 ++++++++-
 pyccel/parser/syntactic.py                    |   4 +
 tests/conftest.py                             |   9 +
 tests/cuda/test_kernel_semantic.py            | 176 ++++++++++++++++++
 tests/pyccel/scripts/kernel/hello_kernel.py   |  19 ++
 .../scripts/kernel/kernel_name_collision.py   |   8 +
 tests/pyccel/test_pyccel.py                   |  22 ++-
 19 files changed, 599 insertions(+), 9 deletions(-)
 create mode 100644 docs/cuda.md
 create mode 100644 pyccel/ast/cuda.py
 create mode 100644 pyccel/ast/cudaext.py
 create mode 100644 pyccel/cuda/__init__.py
 create mode 100644 pyccel/cuda/cuda_sync_primitives.py
 create mode 100644 tests/cuda/test_kernel_semantic.py
 create mode 100644 tests/pyccel/scripts/kernel/hello_kernel.py
 create mode 100644 tests/pyccel/scripts/kernel/kernel_name_collision.py

diff --git a/.dict_custom.txt b/.dict_custom.txt
index 161337d33b..6ddf80b1ff 100644
--- a/.dict_custom.txt
+++ b/.dict_custom.txt
@@ -120,3 +120,4 @@ indexable
 traceback
 STC
 gFTL
+GPUs
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8885e66107..10bec59084 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file.
 
 -   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
 -   #48 : Fix incorrect handling of imports in `cuda`.
+-   #42 : Add support for custom kernel in`cuda`.
+-   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
 
 ## \[UNRELEASED\]
 
diff --git a/docs/cuda.md b/docs/cuda.md
new file mode 100644
index 0000000000..de30d52b80
--- /dev/null
+++ b/docs/cuda.md
@@ -0,0 +1,23 @@
+# Getting started GPU
+
+Pyccel now supports NVIDIA CUDA, empowering users to accelerate numerical computations on GPUs seamlessly. With Pyccel's high-level syntax and automatic code generation, harnessing the power of CUDA becomes effortless. This documentation provides a quick guide to enabling CUDA in Pyccel
+
+## Cuda Decorator
+
+### kernel
+
+The kernel decorator allows the user to declare a CUDA kernel. The kernel can be defined in Python, and the syntax is similar to that of Numba.
+
+```python
+from pyccel.decorators import kernel
+
+@kernel
+def my_kernel():
+    pass
+
+blockspergrid = 1
+threadsperblock = 1
+# Call your kernel function
+my_kernel[blockspergrid, threadsperblock]()
+
+```
\ No newline at end of file
diff --git a/pyccel/ast/core.py b/pyccel/ast/core.py
index 8981ddc160..2758b75be2 100644
--- a/pyccel/ast/core.py
+++ b/pyccel/ast/core.py
@@ -73,6 +73,7 @@
     'If',
     'IfSection',
     'Import',
+    'IndexedFunctionCall',
     'InProgram',
     'InlineFunctionDef',
     'Interface',
@@ -2065,6 +2066,42 @@ def _ignore(cls, c):
         """
         return c is None or isinstance(c, (FunctionDef, *cls._ignored_types))
 
+class IndexedFunctionCall(FunctionCall):
+    """
+    Represents an indexed function call in the code.
+
+    Class representing indexed function calls, encapsulating all
+    relevant information for such calls within the code base.
+
+    Parameters
+    ----------
+    func : FunctionDef
+        The function being called.
+
+    args : iterable of FunctionCallArgument
+        The arguments passed to the function.
+
+    indexes : iterable of TypedAstNode
+        The indexes of the function call.
+
+    current_function : FunctionDef, optional
+        The function where the call takes place.
+    """
+    __slots__ = ('_indexes',)
+    _attribute_nodes = FunctionCall._attribute_nodes + ('_indexes',)
+    def __init__(self, func, args, indexes, current_function = None):
+        self._indexes = indexes
+        super().__init__(func, args, current_function)
+
+    @property
+    def indexes(self):
+        """
+        Indexes of function call.
+
+        Represents the indexes of the function call
+        """
+        return self._indexes
+
 class ConstructorCall(FunctionCall):
 
     """
diff --git a/pyccel/ast/cuda.py b/pyccel/ast/cuda.py
new file mode 100644
index 0000000000..f1e50ef7f0
--- /dev/null
+++ b/pyccel/ast/cuda.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+CUDA Module
+This module provides a collection of classes and utilities for CUDA programming.
+"""
+from pyccel.ast.core import FunctionCall
+
+__all__ = (
+    'KernelCall',
+)
+
+class KernelCall(FunctionCall):
+    """
+    Represents a kernel function call in the code.
+
+    The class serves as a representation of a kernel
+    function call within the codebase.
+
+    Parameters
+    ----------
+    func : FunctionDef
+        The definition of the function being called.
+
+    args : iterable of FunctionCallArgument
+        The arguments passed to the function.
+
+    num_blocks : TypedAstNode
+        The number of blocks. These objects must have a primitive type of `PrimitiveIntegerType`.
+
+    tp_block : TypedAstNode
+        The number of threads per block. These objects must have a primitive type of `PrimitiveIntegerType`.
+
+    current_function : FunctionDef, optional
+        The function where the call takes place.
+    """
+    __slots__ = ('_num_blocks','_tp_block')
+    _attribute_nodes = (*FunctionCall._attribute_nodes, '_num_blocks', '_tp_block')
+
+    def __init__(self, func, args, num_blocks, tp_block, current_function = None):
+        self._num_blocks = num_blocks
+        self._tp_block = tp_block
+        super().__init__(func, args, current_function)
+
+    @property
+    def num_blocks(self):
+        """
+        The number of blocks in the kernel being called.
+
+        The number of blocks in the kernel being called.
+        """
+        return self._num_blocks
+
+    @property
+    def tp_block(self):
+        """
+        The number of threads per block.
+
+        The number of threads per block.
+        """
+        return self._tp_block
+
diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
new file mode 100644
index 0000000000..b540f20993
--- /dev/null
+++ b/pyccel/ast/cudaext.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+CUDA Extension Module
+Provides CUDA functionality for code generation.
+"""
+from .internals      import PyccelFunction
+
+from .datatypes      import VoidType
+from .core           import Module, PyccelFunctionDef
+
+__all__ = (
+    'CudaSynchronize',
+)
+
+class CudaSynchronize(PyccelFunction):
+    """
+    Represents a call to Cuda.synchronize for code generation.
+
+    This class serves as a representation of the Cuda.synchronize method.
+    """
+    __slots__ = ()
+    _attribute_nodes = ()
+    _shape     = None
+    _class_type = VoidType()
+    def __init__(self):
+        super().__init__()
+
+cuda_funcs = {
+    'synchronize'       : PyccelFunctionDef('synchronize' , CudaSynchronize),
+}
+
+cuda_mod = Module('cuda',
+    variables=[],
+    funcs=cuda_funcs.values(),
+    imports=[]
+)
+
diff --git a/pyccel/ast/utilities.py b/pyccel/ast/utilities.py
index 1e6c0422ab..e5cd77b168 100644
--- a/pyccel/ast/utilities.py
+++ b/pyccel/ast/utilities.py
@@ -25,6 +25,7 @@
 from .literals      import LiteralInteger, LiteralEllipsis, Nil
 from .mathext       import math_mod
 from .sysext        import sys_mod
+from .cudaext       import cuda_mod
 
 from .numpyext      import (NumpyEmpty, NumpyArray, numpy_mod,
                             NumpyTranspose, NumpyLinspace)
@@ -49,7 +50,8 @@
 decorators_mod = Module('decorators',(),
         funcs = [PyccelFunctionDef(d, PyccelFunction) for d in pyccel_decorators.__all__])
 pyccel_mod = Module('pyccel',(),(),
-        imports = [Import('decorators', decorators_mod)])
+        imports = [Import('decorators', decorators_mod),
+                    Import('cuda', cuda_mod)])
 
 # TODO add documentation
 builtin_import_registry = Module('__main__',
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 277d2a3a6a..cd26843017 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -9,11 +9,12 @@
 enabling the direct translation of high-level Pyccel expressions into CUDA code.
 """
 
-from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers
+from pyccel.codegen.printing.ccode  import CCodePrinter
 
-from pyccel.ast.core        import Import, Module
+from pyccel.ast.core                import Import, Module
+from pyccel.ast.literals            import Nil
 
-from pyccel.errors.errors   import Errors
+from pyccel.errors.errors           import Errors
 
 
 errors = Errors()
@@ -61,6 +62,44 @@ def _print_Module(self, expr):
         self.exit_scope()
         return code
 
+    def function_signature(self, expr, print_arg_names = True):
+        """
+        Get the Cuda representation of the function signature.
+
+        Extract from the function definition `expr` all the
+        information (name, input, output) needed to create the
+        function signature and return a string describing the
+        function.
+        This is not a declaration as the signature does not end
+        with a semi-colon.
+
+        Parameters
+        ----------
+        expr : FunctionDef
+            The function definition for which a signature is needed.
+
+        print_arg_names : bool, default : True
+            Indicates whether argument names should be printed.
+
+        Returns
+        -------
+        str
+            Signature of the function.
+        """
+        cuda_decorater = '__global__' if 'kernel' in expr.decorators else ''
+        c_function_signature = super().function_signature(expr, print_arg_names)
+        return f'{cuda_decorater} {c_function_signature}'
+
+    def _print_KernelCall(self, expr):
+        func = expr.funcdef
+        args = [a.value or Nil() for a in expr.args]
+
+        args = ', '.join(self._print(a) for a in args)
+        return f"{func.name}<<<{expr.num_blocks}, {expr.tp_block}>>>({args});\n"
+
+    def _print_CudaSynchronize(self, expr):
+        return 'cudaDeviceSynchronize();\n'
+
     def _print_ModuleHeader(self, expr):
         self.set_scope(expr.module.scope)
         self._in_header = True
@@ -87,6 +126,7 @@ def _print_ModuleHeader(self, expr):
                     }}\n'
         return '\n'.join((f"#ifndef {name.upper()}_H",
                           f"#define {name.upper()}_H",
+                          imports,
                           global_variables,
                           function_declaration,
                           "#endif // {name.upper()}_H\n"))
diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py
new file mode 100644
index 0000000000..e8542ad5d5
--- /dev/null
+++ b/pyccel/cuda/__init__.py
@@ -0,0 +1,10 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+    This module is for exposing the CudaSubmodule functions.
+"""
+from .cuda_sync_primitives    import synchronize
+
+__all__ = ['synchronize']
diff --git a/pyccel/cuda/cuda_sync_primitives.py b/pyccel/cuda/cuda_sync_primitives.py
new file mode 100644
index 0000000000..f3442fe9e2
--- /dev/null
+++ b/pyccel/cuda/cuda_sync_primitives.py
@@ -0,0 +1,16 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+This submodule contains CUDA methods for Pyccel.
+"""
+
+
+def synchronize():
+    """
+    Synchronize CUDA device execution.
+
+    Synchronize CUDA device execution.
+    """
+
diff --git a/pyccel/decorators.py b/pyccel/decorators.py
index 1f640043db..77717a991f 100644
--- a/pyccel/decorators.py
+++ b/pyccel/decorators.py
@@ -19,6 +19,7 @@
     'sympy',
     'template',
     'types',
+    'kernel'
 )
 
 
@@ -109,3 +110,34 @@ def allow_negative_index(f,*args):
     def identity(f):
         return f
     return identity
+
+def kernel(f):
+    """
+    Decorator for marking a Python function as a kernel.
+
+    This class serves as a decorator to mark a Python function
+    as a kernel function, typically used for GPU computations.
+    This allows the function to be indexed with the number of blocks and threads.
+
+    Parameters
+    ----------
+    f : function
+        The function to which the decorator is applied.
+
+    Returns
+    -------
+    KernelAccessor
+        A class representing the kernel function.
+    """
+    class KernelAccessor:
+        """
+        Class representing the kernel function.
+
+        Class representing the kernel function.
+        """
+        def __init__(self, f):
+            self._f = f
+        def __getitem__(self, args):
+            return self._f
+
+    return KernelAccessor(f)
diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py
index 79eccc1df2..09966d810c 100644
--- a/pyccel/errors/messages.py
+++ b/pyccel/errors/messages.py
@@ -162,3 +162,11 @@
 WRONG_LINSPACE_ENDPOINT = 'endpoint argument must be boolean'
 NON_LITERAL_KEEP_DIMS = 'keep_dims argument must be a literal, otherwise rank is unknown'
 NON_LITERAL_AXIS = 'axis argument must be a literal, otherwise pyccel cannot determine which dimension to operate on'
+MISSING_KERNEL_CONFIGURATION = 'Kernel launch configuration not specified'
+INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch'
+INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call'
+INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call'
+
+
+
+
diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py
index c7ce8d9d7e..9519bc8a63 100644
--- a/pyccel/parser/semantic.py
+++ b/pyccel/parser/semantic.py
@@ -116,6 +116,8 @@
 from pyccel.ast.variable import IndexedElement, AnnotatedPyccelSymbol
 from pyccel.ast.variable import DottedName, DottedVariable
 
+from pyccel.ast.cuda import     KernelCall
+
 from pyccel.errors.errors import Errors
 from pyccel.errors.errors import PyccelSemanticError
 
@@ -133,7 +135,9 @@
         PYCCEL_RESTRICTION_LIST_COMPREHENSION_LIMITS, PYCCEL_RESTRICTION_LIST_COMPREHENSION_SIZE,
         UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE,
         PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT,
-        FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC)
+        FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET,
+        MISSING_KERNEL_CONFIGURATION,
+        INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK)
 
 from pyccel.parser.base      import BasicParser
 from pyccel.parser.syntactic import SyntaxParser
@@ -1139,6 +1143,67 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun
 
             return new_expr
 
+    def _handle_kernel(self, expr, func, args):
+        """
+        Create the node representing the kernel function call.
+
+        Create a FunctionCall or an instance of a PyccelInternalFunction
+        from the function information and arguments.
+
+        Parameters
+        ----------
+        expr : IndexedFunctionCall
+               Node has all the information about the function call.
+
+        func : FunctionDef | Interface | PyccelInternalFunction type
+               The function being called.
+
+        args : iterable of FunctionCallArgument
+               The arguments passed to the function.
+
+        Returns
+        -------
+        Pyccel.ast.cuda.KernelCall
+            The semantic representation of the kernel call.
+        """
+        if len(expr.indexes) != 2:
+            errors.report(INVALID_KERNEL_LAUNCH_CONFIG,
+                    symbol=expr,
+                    severity='fatal')
+        if len(func.results):
+            errors.report(f"cuda kernel function '{func.name}' returned a value in violation of the laid-down specification",
+                         symbol=expr,
+                         severity='fatal')
+        if isinstance(func, FunctionDef) and len(args) != len(func.arguments):
+            errors.report(f"{len(args)} argument types given, but function takes {len(func.arguments)} arguments",
+                symbol=expr,
+                severity='fatal')
+        if not isinstance(expr.indexes[0], (LiteralInteger)):
+            if isinstance(expr.indexes[0], PyccelSymbol):
+                num_blocks = self.get_variable(expr.indexes[0])
+
+                if not isinstance(num_blocks.dtype, PythonNativeInt):
+                    errors.report(INVALID_KERNEL_CALL_BP_GRID,
+                    symbol = expr,
+                    severity='fatal')
+            else:
+                errors.report(INVALID_KERNEL_CALL_BP_GRID,
+                    symbol = expr,
+                    severity='fatal')
+        if not isinstance(expr.indexes[1], (LiteralInteger)):
+            if isinstance(expr.indexes[1], PyccelSymbol):
+                tp_block = self.get_variable(expr.indexes[1])
+                if not isinstance(tp_block.dtype, PythonNativeInt):
+                    errors.report(INVALID_KERNEL_CALL_TP_BLOCK,
+                    symbol = expr,
+                    severity='fatal')
+            else:
+                errors.report(INVALID_KERNEL_CALL_TP_BLOCK,
+                    symbol = expr,
+                    severity='fatal')
+        new_expr = KernelCall(func, args, expr.indexes[0], expr.indexes[1])
+        return new_expr
+
     def _sort_function_call_args(self, func_args, args):
         """
         Sort and add the missing call arguments to match the arguments in the function definition.
@@ -2815,6 +2880,23 @@ def _visit_Lambda(self, expr):
                 expr = Lambda(tuple(expr.variables), expr_new)
         return expr
 
+    def _visit_IndexedFunctionCall(self, expr):
+        name     = expr.funcdef
+        name = self.scope.get_expected_name(name)
+        func     = self.scope.find(name, 'functions')
+        args = self._handle_function_args(expr.args)
+
+        if func is None:
+            return errors.report(UNDEFINED_FUNCTION, symbol=expr.funcdef,
+                    bounding_box=(self.current_ast_node.lineno, self.current_ast_node.col_offset),
+                    severity='fatal')
+
+        func = self._annotate_the_called_function_def(func)
+        if 'kernel' in func.decorators :
+            return self._handle_kernel(expr, func, args)
+        else:
+            return errors.report("Unknown function type",
+                symbol=expr, severity='fatal')
     def _visit_FunctionCall(self, expr):
         name     = expr.funcdef
         try:
diff --git a/pyccel/parser/syntactic.py b/pyccel/parser/syntactic.py
index 2967f4999b..3af7f0728a 100644
--- a/pyccel/parser/syntactic.py
+++ b/pyccel/parser/syntactic.py
@@ -64,6 +64,8 @@
 
 from pyccel.ast.type_annotations import SyntacticTypeAnnotation, UnionTypeAnnotation
 
+from pyccel.ast.core import IndexedFunctionCall
+
 from pyccel.parser.base        import BasicParser
 from pyccel.parser.extend_tree import extend_tree
 from pyccel.parser.utilities   import get_default_path
@@ -1102,6 +1104,8 @@ def _visit_Call(self, stmt):
         elif isinstance(func, DottedName):
             func_attr = FunctionCall(func.name[-1], args)
             func = DottedName(*func.name[:-1], func_attr)
+        elif isinstance(func,IndexedElement):
+            func = IndexedFunctionCall(func.base, args, func.indices)
         else:
             raise NotImplementedError(f' Unknown function type {type(func)}')
 
diff --git a/tests/conftest.py b/tests/conftest.py
index a5082ef6e8..4e74d1ec7a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -59,6 +59,15 @@ def pytest_runtest_teardown(item, nextitem):
 
 def pytest_addoption(parser):
     parser.addoption("--developer-mode", action="store_true", default=github_debugging, help="Show tracebacks when pyccel errors are raised")
+    parser.addoption("--gpu_available", action="store_true",
+                default=False, help="enable GPU tests")
+
+def pytest_generate_tests(metafunc):
+    if "gpu_available" in metafunc.fixturenames:
+        if metafunc.config.getoption("gpu_available"):
+            metafunc.parametrize("gpu_available", [True])
+        else:
+            metafunc.parametrize("gpu_available", [False])
 
 def pytest_sessionstart(session):
     # setup_stuff
diff --git a/tests/cuda/test_kernel_semantic.py b/tests/cuda/test_kernel_semantic.py
new file mode 100644
index 0000000000..00b74c3bea
--- /dev/null
+++ b/tests/cuda/test_kernel_semantic.py
@@ -0,0 +1,176 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import pytest
+
+from pyccel import epyccel
+from pyccel.decorators import kernel
+from pyccel.errors.errors import Errors, PyccelSemanticError
+from pyccel.errors.messages import (INVALID_KERNEL_CALL_TP_BLOCK,
+                                    INVALID_KERNEL_CALL_BP_GRID,
+                                    INVALID_KERNEL_LAUNCH_CONFIG)
+
+
+@pytest.mark.cuda
+def test_invalid_block_number():
+    def invalid_block_number():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1.0
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_block_number, language="cuda")
+
+    assert errors.has_errors()
+
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_CALL_BP_GRID == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_thread_per_block():
+    def invalid_thread_per_block():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1.0
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_thread_per_block, language="cuda")
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_CALL_TP_BLOCK == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_launch_config_high():
+    def invalid_launch_config_high():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        third_param = 1
+        kernel_call[blocks_per_grid, threads_per_block, third_param]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_launch_config_high, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_launch_config_low():
+    def invalid_launch_config_low():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        kernel_call[blocks_per_grid]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_launch_config_low, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_arguments_for_kernel_call():
+    def invalid_arguments():
+        @kernel
+        def kernel_call(arg : int):
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_arguments, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "0 argument types given, but function takes 1 arguments" == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_arguments_for_kernel_call_2():
+    def invalid_arguments_():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block](1)
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_arguments_, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "1 argument types given, but function takes 0 arguments" == error_info.message
+
+
+@pytest.mark.cuda
+def test_kernel_return():
+    def kernel_return():
+        @kernel
+        def kernel_call():
+            return 7
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block](1)
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(kernel_return, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "cuda kernel function 'kernel_call' returned a value in violation of the laid-down specification" == error_info.message
diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py
new file mode 100644
index 0000000000..b6901b25a1
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/hello_kernel.py
@@ -0,0 +1,19 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+from pyccel import cuda
+
+@kernel
+def say_hello(its_morning : bool):
+    if(its_morning):
+        print("Hello and Good morning")
+    else:
+        print("Hello and Good afternoon")
+
+def f():
+    its_morning = True
+    say_hello[1,1](its_morning)
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
+
diff --git a/tests/pyccel/scripts/kernel/kernel_name_collision.py b/tests/pyccel/scripts/kernel/kernel_name_collision.py
new file mode 100644
index 0000000000..ac7abe25ae
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/kernel_name_collision.py
@@ -0,0 +1,8 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+
+@kernel
+def do():
+    pass
+
+do[1,1]()
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index ec1e846549..b4757a3c31 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -294,7 +294,7 @@ def compare_pyth_fort_output( p_output, f_output, dtype=float, language=None):
 #------------------------------------------------------------------------------
 def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True,
         cwd = None, pyccel_commands = "", output_dtype = float,
-        language = None, output_dir = None):
+        language = None, output_dir = None, execute_code = True):
     """
     Run pyccel and compare the output to ensure that the results
     are equivalent
@@ -394,13 +394,14 @@ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True,
             compile_fortran(cwd, output_test_file, dependencies)
         elif language == 'c':
             compile_c(cwd, output_test_file, dependencies)
-
-    lang_out = get_lang_output(output_test_file, language)
-    compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language)
+    if  execute_code:
+        lang_out = get_lang_output(output_test_file, language)
+        compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language)
 
 #==============================================================================
 # UNIT TESTS
 #==============================================================================
+
 def test_relative_imports_in_project(language):
 
     base_dir = os.path.dirname(os.path.realpath(__file__))
@@ -728,6 +729,19 @@ def test_multiple_results(language):
 def test_elemental(language):
     pyccel_test("scripts/decorators_elemental.py", language = language)
 
+#------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_hello_kernel(gpu_available):
+    types = str
+    pyccel_test("scripts/kernel/hello_kernel.py",
+            language="cuda", output_dtype=types , execute_code=gpu_available)
+
+#------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_kernel_collision(gpu_available):
+    pyccel_test("scripts/kernel/kernel_name_collision.py",
+            language="cuda", execute_code=gpu_available)
+
 #------------------------------------------------------------------------------
 def test_print_strings(language):
     types = str

From 31aae9d43845e6a3ea037d2b53428681af13946e Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 3 Jul 2024 17:37:02 +0100
Subject: [PATCH 066/130] Updated CUDA Name Clash Checker By Added
 CUDA-specific keywords (#60)

This pull request addresses issue #59 by adding more CUDA-specific
keywords to enhance the checking of variable/function names and prevent
name clashes

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
---
 CHANGELOG.md                              |  1 +
 pyccel/naming/cudanameclashchecker.py     | 36 ++++++++++++++++++++++-
 pyccel/naming/languagenameclashchecker.py |  5 ++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 10bec59084..0539783922 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file.
 
 -   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
 -   #48 : Fix incorrect handling of imports in `cuda`.
+-   #59 : Updated `cuda` clash checker.
 -   #42 : Add support for custom kernel in`cuda`.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
 
diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py
index 971204e912..c7aaa4952f 100644
--- a/pyccel/naming/cudanameclashchecker.py
+++ b/pyccel/naming/cudanameclashchecker.py
@@ -16,6 +16,7 @@ class CudaNameClashChecker(LanguageNameClashChecker):
     verify that they do not cause name clashes. Name clashes may be due to
     new variables, or due to the use of reserved keywords.
     """
+
     # Keywords as mentioned on https://en.cppreference.com/w/c/keyword
     keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const',
         'continue', 'default', 'do', 'double', 'else', 'enum',
@@ -37,7 +38,40 @@ class CudaNameClashChecker(LanguageNameClashChecker):
         'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX',
         'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer',
         'get_index', 'numpy_to_ndarray_strides',
-        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'])
+        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'
+        '__global__', '__device__', '__host__','__constant__', '__shared__',
+        '__managed__','threadIdx', 'blockIdx', 'blockDim', 'gridDim',
+        'warpSize', 'cudaMalloc', 'cudaFree', 'cudaMemcpy', 'cudaMemset',
+        'cudaMallocHost', 'cudaFreeHost', 'cudaMallocPitch',
+        'cudaMallocArray', 'cudaFreeArray', 'cudaHostAlloc',
+        'cudaHostRegister', 'cudaHostUnregister', 'cudaHostGetDevicePointer',
+        'cudaHostGetFlags', 'cudaDeviceSynchronize', 'cudaDeviceReset',
+        'cudaSetDevice', 'cudaGetDeviceCount', 'cudaGetDeviceProperties',
+        'cudaChooseDevice', 'cudaSetDeviceFlags', 'cudaGetDevice',
+        'cudaStreamCreate', 'cudaStreamDestroy', 'cudaStreamSynchronize',
+        'cudaStreamWaitEvent', 'cudaEventCreate', 'cudaEventDestroy', 'cudaEventRecord',
+        'cudaEventSynchronize', 'cudaEventElapsedTime', 'cuInit', 'cuDeviceGet',
+        'cuDeviceGetCount', 'cuDeviceGetName',
+        'cuDeviceComputeCapability', 'cuCtxCreate', 'cuCtxDestroy',
+        'cuCtxSynchronize', 'cuModuleLoad', 'cuModuleUnload',
+        'cuModuleGetFunction', 'cuModuleGetGlobal', 'cuModuleGetTexRef',
+        'cuMemAlloc', 'cuMemFree', 'cuMemcpyHtoD', 'cuMemcpyDtoH',
+        'cuMemcpyDtoD', 'cuMemcpyHtoDAsync', 'cuMemcpyDtoHAsync',
+        'cuMemcpyDtoDAsync', 'cuMemsetD8', 'cuMemsetD16', 'cuMemsetD32',
+        'cuMemsetD2D8', 'cuMemsetD2D16', 'cuMemsetD2D32', 'cuParamSetSize',
+        'cuParamSeti', 'cuParamSetf', 'cuParamSetv', 'cuLaunch', 'cuLaunchGrid',
+        'cuLaunchGridAsync', 'cuEventCreate', 'cuEventRecord', 'cuEventQuery',
+        'cuEventSynchronize', 'cuEventDestroy', 'cuEventElapsedTime',
+        'cuStreamCreate', 'cuStreamQuery', 'cuStreamSynchronize',
+        'cuStreamDestroy', 'cuFuncSetBlockShape', 'cuFuncSetSharedSize',
+        'cuFuncGetAttribute', 'cuTexRefCreate', 'cuTexRefDestroy',
+        'cuTexRefSetArray', 'cuTexRefSetAddress', 'cuTexRefSetAddress2D',
+        'cuTexRefSetFormat', 'cuTexRefSetAddressMode', 'cuTexRefSetFilterMode',
+        'cuTexRefSetFlags', 'cuTexRefGetAddress', 'cuTexRefGetArray',
+        'cuTexRefGetAddressMode', 'cuTexRefGetFilterMode', 'cuTexRefGetFormat',
+        'cuTexRefGetFlags', 'cuLaunchKernel', 'cuOccupancyMaxActiveBlocksPerMultiprocessor',
+        'cuOccupancyMaxPotentialBlockSize', 'cuOccupancyMaxPotentialBlockSizeWithFlags'
+    ])
 
     def has_clash(self, name, symbols):
         """
diff --git a/pyccel/naming/languagenameclashchecker.py b/pyccel/naming/languagenameclashchecker.py
index fa672a905b..d6415e6449 100644
--- a/pyccel/naming/languagenameclashchecker.py
+++ b/pyccel/naming/languagenameclashchecker.py
@@ -19,6 +19,11 @@ class LanguageNameClashChecker(metaclass = Singleton):
     """
     keywords = None
 
+    def __init__(self): #pylint: disable=useless-parent-delegation
+        # This __init__ function is required so the ArgumentSingleton can
+        # always detect a signature
+        super().__init__()
+
     def _get_collisionless_name(self, name, symbols):
         """
         Get a name which doesn't collision with keywords or symbols.

From 21c93b5bfb93ea9af843436ec4c17fc0b17898e7 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 3 Jul 2024 18:04:22 +0100
Subject: [PATCH 067/130] add handle for custom device (#61)

This pull request addresses issue
https://github.com/pyccel/pyccel-cuda/issues/41 by implementing a new
feature in Pyccel that allows users to define a custom device

**Commit Summary**

- Adding handler for custom device and its code generation.
- Adding test

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
---
 CHANGELOG.md                               |  1 +
 docs/cuda.md                               | 25 ++++++++++++++++-
 pyccel/codegen/printing/cucode.py          |  7 ++---
 pyccel/decorators.py                       | 19 +++++++++++++
 pyccel/errors/messages.py                  |  2 +-
 pyccel/parser/semantic.py                  |  7 ++++-
 tests/cuda/test_device_semantic.py         | 31 ++++++++++++++++++++++
 tests/pyccel/scripts/kernel/device_test.py | 18 +++++++++++++
 tests/pyccel/test_pyccel.py                |  8 ++++++
 9 files changed, 112 insertions(+), 6 deletions(-)
 create mode 100644 tests/cuda/test_device_semantic.py
 create mode 100644 tests/pyccel/scripts/kernel/device_test.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0539783922..1b9fa3e635 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file.
 -   #59 : Updated `cuda` clash checker.
 -   #42 : Add support for custom kernel in`cuda`.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
+-   #41 : Add support for custom device in`cuda`.
 
 ## \[UNRELEASED\]
 
diff --git a/docs/cuda.md b/docs/cuda.md
index de30d52b80..7643a4ac02 100644
--- a/docs/cuda.md
+++ b/docs/cuda.md
@@ -20,4 +20,27 @@ threadsperblock = 1
 # Call your kernel function
 my_kernel[blockspergrid, threadsperblock]()
 
-```
\ No newline at end of file
+```
+
+### device
+
+Device functions are similar to kernels, but are executed within the context of a kernel. They can be called only from kernels or device functions, and are typically used for operations that are too small to justify launching a separate kernel, or for operations that need to be performed repeatedly within the context of a kernel.
+
+```python
+from pyccel.decorators import device, kernel
+
+@device
+def add(x, y):
+    return x + y
+
+@kernel
+def my_kernel():
+    x = 1
+    y = 2
+    z = add(x, y)
+    print(z)
+
+my_kernel[1, 1]()
+
+```
+
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index cd26843017..7c01d93c47 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -86,9 +86,10 @@ def function_signature(self, expr, print_arg_names = True):
         str
             Signature of the function.
         """
-        cuda_decorater = '__global__' if 'kernel' in expr.decorators else ''
+        cuda_decorator = '__global__' if 'kernel' in expr.decorators else \
+        '__device__' if 'device' in expr.decorators else ''
         c_function_signature = super().function_signature(expr, print_arg_names)
-        return f'{cuda_decorater} {c_function_signature}'
+        return f'{cuda_decorator} {c_function_signature}'
 
     def _print_KernelCall(self, expr):
         func = expr.funcdef
@@ -109,7 +110,7 @@ def _print_ModuleHeader(self, expr):
         cuda_headers = ""
         for f in expr.module.funcs:
             if not f.is_inline:
-                if 'kernel' in f.decorators:  # Checking for 'kernel' decorator
+                if 'kernel' in f.decorators or 'device' in f.decorators:
                     cuda_headers += self.function_signature(f) + ';\n'
                 else:
                     funcs += self.function_signature(f) + ';\n'
diff --git a/pyccel/decorators.py b/pyccel/decorators.py
index 77717a991f..ff413fe443 100644
--- a/pyccel/decorators.py
+++ b/pyccel/decorators.py
@@ -11,6 +11,7 @@
 __all__ = (
     'allow_negative_index',
     'bypass',
+    'device',
     'elemental',
     'inline',
     'private',
@@ -141,3 +142,21 @@ def __getitem__(self, args):
             return self._f
 
     return KernelAccessor(f)
+
+def device(f):
+    """
+    Decorator for marking a function as a GPU device function.
+
+    This decorator is used to mark a Python function as a GPU device function.
+
+    Parameters
+    ----------
+    f : Function
+        The function to be marked as a device.
+
+    Returns
+    -------
+    f
+        The function marked as a device.
+    """
+    return f
diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py
index 09966d810c..5fe622c29b 100644
--- a/pyccel/errors/messages.py
+++ b/pyccel/errors/messages.py
@@ -166,7 +166,7 @@
 INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch'
 INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call'
 INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call'
-
+INVAlID_DEVICE_CALL = 'A function decorated with "device" should be called only from a "kernel" or another "device" function.'
 
 
 
diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py
index 9519bc8a63..b3a7ecd6b1 100644
--- a/pyccel/parser/semantic.py
+++ b/pyccel/parser/semantic.py
@@ -136,9 +136,10 @@
         UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE,
         PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT,
         FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET,
-        MISSING_KERNEL_CONFIGURATION,
+        MISSING_KERNEL_CONFIGURATION, INVAlID_DEVICE_CALL,
         INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK)
 
+
 from pyccel.parser.base      import BasicParser
 from pyccel.parser.syntactic import SyntaxParser
 
@@ -1061,6 +1062,10 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun
         FunctionCall/PyccelFunction
             The semantic representation of the call.
         """
+
+        if isinstance(func, FunctionDef) and 'device' in func.decorators:
+            if 'kernel' not in self.scope.decorators and 'device' not in self.scope.decorators:
+                errors.report(INVAlID_DEVICE_CALL,symbol=expr, severity='fatal')
         if isinstance(func, PyccelFunctionDef):
             if use_build_functions:
                 annotation_method = '_build_' + func.cls_name.__name__
diff --git a/tests/cuda/test_device_semantic.py b/tests/cuda/test_device_semantic.py
new file mode 100644
index 0000000000..5723991961
--- /dev/null
+++ b/tests/cuda/test_device_semantic.py
@@ -0,0 +1,31 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import pytest
+
+from pyccel import epyccel
+from pyccel.decorators import device
+from pyccel.errors.errors import Errors, PyccelSemanticError
+from pyccel.errors.messages import (INVAlID_DEVICE_CALL,)
+
+
+@pytest.mark.cuda
+def test_invalid_device_call():
+    def invalid_device_call():
+        @device
+        def device_call():
+            pass
+        def fake_kernel_call():
+            device_call()
+
+        fake_kernel_call()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_device_call, language="cuda")
+
+    assert errors.has_errors()
+
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert INVAlID_DEVICE_CALL == error_info.message
diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py
new file mode 100644
index 0000000000..a4762a6242
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/device_test.py
@@ -0,0 +1,18 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import device, kernel
+from pyccel import cuda
+
+@device
+def device_call():
+    print("Hello from device")
+
+@kernel
+def kernel_call():
+    device_call()
+
+def f():
+    kernel_call[1,1]()
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index b4757a3c31..2d55c6e1cb 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -742,6 +742,14 @@ def test_kernel_collision(gpu_available):
     pyccel_test("scripts/kernel/kernel_name_collision.py",
             language="cuda", execute_code=gpu_available)
 
+#------------------------------------------------------------------------------
+
+@pytest.mark.cuda
+def test_device_call(gpu_available):
+    types = str
+    pyccel_test("scripts/kernel/device_test.py",
+            language="cuda", output_dtype=types, execute_code=gpu_available)
+
 #------------------------------------------------------------------------------
 def test_print_strings(language):
     types = str

From d66442d4a09c479d92b17c8a262634d3f4995888 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Thu, 18 Jul 2024 10:41:06 +0100
Subject: [PATCH 068/130] fix a doc problem

---
 pyccel/ast/cudaext.py             | 1 -
 pyccel/codegen/printing/ccode.py  | 2 +-
 pyccel/codegen/printing/cucode.py | 9 +++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index 99efd2c4ed..c823ee1181 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -55,7 +55,6 @@ def init_dtype(self):
         The dtype provided to the function when it was initialised in Python.
 
         The dtype provided to the function when it was initialised in Python.
-        If no dtype was provided then this should equal `None`.
         """
         return self._init_dtype
 
diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index 41517f2420..3ca1833d4a 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -1462,7 +1462,7 @@ def _print_IndexedElement(self, expr):
         inds = list(expr.indices)
         base_shape = base.shape
         allow_negative_indexes = expr.allows_negative_indexes
-        
+
         if isinstance(base.class_type, NumpyNDArrayType):
             #set dtype to the C struct types
             dtype = self.find_in_ndarray_type_registry(expr.dtype)
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 61254cf367..1d6d02e7e4 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -199,9 +199,10 @@ def get_declare_type(self, expr):
 
     def _print_Assign(self, expr):
         rhs = expr.rhs
-        if not isinstance(rhs.class_type, CudaArrayType):
-                return super()._print_Assign(expr)
-        if(isinstance(rhs, (CudaFull))):
+        if isinstance(rhs.class_type, CudaArrayType):
+            if(isinstance(rhs, (CudaFull))):
             # TODO add support for CudaFull
-            return " \n"
+                return " \n"
+
+        return super()._print_Assign(expr)
 

From 976e72904fecf9816f1901647c7ec449317be3b0 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Thu, 18 Jul 2024 11:01:45 +0100
Subject: [PATCH 069/130] fix a doc problem

---
 pyccel/ast/cudaext.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index c823ee1181..f890fc7999 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -27,7 +27,7 @@
 
 class CudaNewarray(PyccelFunction):
     """
-    superclass for nodes representing Cuda array allocation functions.
+    Superclass for nodes representing Cuda array allocation functions.
 
     Class from which all nodes representing a Cuda function which implies a call
     to `Allocate` should inherit.
@@ -55,6 +55,7 @@ def init_dtype(self):
         The dtype provided to the function when it was initialised in Python.
 
         The dtype provided to the function when it was initialised in Python.
+        If no dtype was provided then this should equal `None`.
         """
         return self._init_dtype
 
@@ -68,7 +69,7 @@ def __init__(self, *args ,class_type, init_dtype, memory_location):
 class CudaFull(CudaNewarray):
     """
     Represents a call to `cuda.full` for code generation.
-    
+
     Represents a call to the Cuda function `full` which creates an array
     of a specified size and shape filled with a specified value.
 

From 0289a778a0e18d43e4898c81b1c45d15c2fca443 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Thu, 18 Jul 2024 16:41:07 +0100
Subject: [PATCH 070/130] debuging perpose

---
 tests/pyccel/test_pyccel.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index 9b0f0d443e..fbf6adc972 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -62,6 +62,8 @@ def compile_pyccel(path_dir, test_file, options = ""):
     cmd = [shutil.which("pyccel"), test_file]
     if options != "":
         cmd += options.strip().split()
+    print(cmd)
+    exit(0)
     p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir)
     p.wait()
     assert p.returncode==0

From ca49b1b704aa8642bd445b7550b9c000e5316673 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Thu, 18 Jul 2024 16:44:57 +0100
Subject: [PATCH 071/130] debuging perpose

---
 tests/pyccel/test_pyccel.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index fbf6adc972..cd33c5d17e 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -62,6 +62,8 @@ def compile_pyccel(path_dir, test_file, options = ""):
     cmd = [shutil.which("pyccel"), test_file]
     if options != "":
         cmd += options.strip().split()
+    print(path_dir)
+    print("-------------------------------------------")
     print(cmd)
     exit(0)
     p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir)

From ce1ddbcd344c040212fc0e8882b787c2d5250577 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Thu, 18 Jul 2024 16:51:47 +0100
Subject: [PATCH 072/130] debuging perpose

---
 tests/pyccel/test_pyccel.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index cd33c5d17e..c7edf5a6e5 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -62,13 +62,20 @@ def compile_pyccel(path_dir, test_file, options = ""):
     cmd = [shutil.which("pyccel"), test_file]
     if options != "":
         cmd += options.strip().split()
-    print(path_dir)
-    print("-------------------------------------------")
-    print(cmd)
-    exit(0)
-    p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir)
-    p.wait()
-    assert p.returncode==0
+    try:
+        p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        out, err = p.communicate()
+        if out:
+            print("Output:\n", out)
+        if err:
+            print("Error:\n", err)
+
+        assert p.returncode == 0, f"Command failed with return code {p.returncode}"
+
+    except AssertionError as e:
+        print(e)
+    except Exception as e:
+        print("An error occurred:", e)
 
 #------------------------------------------------------------------------------
 def compile_c(path_dir, test_file, dependencies, is_mod=False):

From 717c4a4fbed7a0f46e73eb86cc764a5c30cd23ba Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Thu, 18 Jul 2024 16:58:06 +0100
Subject: [PATCH 073/130] debuging perpose

---
 tests/pyccel/test_pyccel.py | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index c7edf5a6e5..9b0f0d443e 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -62,20 +62,9 @@ def compile_pyccel(path_dir, test_file, options = ""):
     cmd = [shutil.which("pyccel"), test_file]
     if options != "":
         cmd += options.strip().split()
-    try:
-        p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        out, err = p.communicate()
-        if out:
-            print("Output:\n", out)
-        if err:
-            print("Error:\n", err)
-
-        assert p.returncode == 0, f"Command failed with return code {p.returncode}"
-
-    except AssertionError as e:
-        print(e)
-    except Exception as e:
-        print("An error occurred:", e)
+    p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir)
+    p.wait()
+    assert p.returncode==0
 
 #------------------------------------------------------------------------------
 def compile_c(path_dir, test_file, dependencies, is_mod=False):

From a19c559f6bd04436f95366dbe1b2be5ce5965c1a Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Thu, 18 Jul 2024 17:04:28 +0100
Subject: [PATCH 074/130] debuging perpose

---
 tests/pyccel/test_pyccel.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index 9b0f0d443e..c42d718f3a 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -62,8 +62,13 @@ def compile_pyccel(path_dir, test_file, options = ""):
     cmd = [shutil.which("pyccel"), test_file]
     if options != "":
         cmd += options.strip().split()
-    p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir)
+    p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir , stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     p.wait()
+    err , msg = p.communicate()
+    if p.returncode != 0:
+        print(err)
+    else:
+        print(msg)
     assert p.returncode==0
 
 #------------------------------------------------------------------------------

From 33842e970201f4b80465b07b9d74df51f15d12e2 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Thu, 18 Jul 2024 17:14:08 +0100
Subject: [PATCH 075/130] debuging perpose

---
 tests/pyccel/test_pyccel.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index c42d718f3a..6001207f67 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -62,14 +62,9 @@ def compile_pyccel(path_dir, test_file, options = ""):
     cmd = [shutil.which("pyccel"), test_file]
     if options != "":
         cmd += options.strip().split()
-    p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir , stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir)
     p.wait()
-    err , msg = p.communicate()
-    if p.returncode != 0:
-        print(err)
-    else:
-        print(msg)
-    assert p.returncode==0
+    # assert p.returncode==0
 
 #------------------------------------------------------------------------------
 def compile_c(path_dir, test_file, dependencies, is_mod=False):

From a52f4a22a5f8eaeff0c9e63ffd44e89db06cb1ce Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Thu, 18 Jul 2024 17:23:49 +0100
Subject: [PATCH 076/130] debuging perpose

---
 tests/pyccel/test_pyccel.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index 6001207f67..33b41ae9d2 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -62,8 +62,13 @@ def compile_pyccel(path_dir, test_file, options = ""):
     cmd = [shutil.which("pyccel"), test_file]
     if options != "":
         cmd += options.strip().split()
-    p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir)
+    p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     p.wait()
+    stdout, stderr = p.communicate()
+    if p.returncode != 0:
+        print(f"Command failed with return code {p.returncode}")
+        print(f"Standard Output:\n{stdout}")
+        print(f"Standard Error:\n{stderr}")
     # assert p.returncode==0
 
 #------------------------------------------------------------------------------

From 40d7c0d994ae1246e00f0f0e8b6adbf801eae5aa Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Thu, 18 Jul 2024 17:30:45 +0100
Subject: [PATCH 077/130] debuging perpose

---
 tests/pyccel/test_pyccel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index 33b41ae9d2..404ca2adb9 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -69,7 +69,7 @@ def compile_pyccel(path_dir, test_file, options = ""):
         print(f"Command failed with return code {p.returncode}")
         print(f"Standard Output:\n{stdout}")
         print(f"Standard Error:\n{stderr}")
-    # assert p.returncode==0
+    assert p.returncode==0
 
 #------------------------------------------------------------------------------
 def compile_c(path_dir, test_file, dependencies, is_mod=False):

From 532cf03b50edd0fcde76d1fffabd4f0a0d016956 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Thu, 18 Jul 2024 17:44:23 +0100
Subject: [PATCH 078/130] debuging perpose

---
 tests/pyccel/test_pyccel.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index 404ca2adb9..6e6f1ba519 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -56,19 +56,29 @@ def get_python_output(abs_path, cwd = None):
     return out
 
 #------------------------------------------------------------------------------
+def cat_file(file_path):
+    try:
+        with open(file_path, 'r') as file:
+            content = file.read()
+            print(content)
+    except FileNotFoundError:
+        print(f"Error: The file {file_path} does not exist.")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+# Path to the file you want to display
+
 def compile_pyccel(path_dir, test_file, options = ""):
     if "python" in options and "--output" not in options:
         options += " --output=__pyccel__"
     cmd = [shutil.which("pyccel"), test_file]
     if options != "":
         cmd += options.strip().split()
-    p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    file_path = "/__w/pyccel-cuda/pyccel-cuda/tests/pyccel/scripts/kernel/__pyccel__/cuda_ndarrays/cuda_ndarrays.cu"
+    cat_file(file_path)
+    exit(0)
+    p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir)
     p.wait()
-    stdout, stderr = p.communicate()
-    if p.returncode != 0:
-        print(f"Command failed with return code {p.returncode}")
-        print(f"Standard Output:\n{stdout}")
-        print(f"Standard Error:\n{stderr}")
     assert p.returncode==0
 
 #------------------------------------------------------------------------------

From 3424b83616964c9d400392ab161f2b634f53cb40 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Thu, 18 Jul 2024 17:55:22 +0100
Subject: [PATCH 079/130] debuging perpose

---
 .../pyccel/scripts/hope_benchmarks/test.json  | 67 +++++++++++++++++++
 tests/pyccel/scripts/kernel/host_array.py     | 12 ++--
 tests/pyccel/scripts/test2.json               | 67 +++++++++++++++++++
 tests/pyccel/test_pyccel.py                   | 15 -----
 4 files changed, 140 insertions(+), 21 deletions(-)
 create mode 100644 tests/pyccel/scripts/hope_benchmarks/test.json
 create mode 100644 tests/pyccel/scripts/test2.json

diff --git a/tests/pyccel/scripts/hope_benchmarks/test.json b/tests/pyccel/scripts/hope_benchmarks/test.json
new file mode 100644
index 0000000000..bd74b25867
--- /dev/null
+++ b/tests/pyccel/scripts/hope_benchmarks/test.json
@@ -0,0 +1,67 @@
+{
+    "exec": "gfortran",
+    "mpi_exec": "mpif90",
+    "language": "fortran",
+    "module_output_flag": "-J",
+    "debug_flags": [
+        "-fcheck=bounds",
+        "-g",
+        "-O0"
+    ],
+    "release_flags": [
+        "-O3",
+        "-funroll-loops"
+    ],
+    "general_flags": [
+        "-fPIC"
+    ],
+    "standard_flags": [
+        "-std=f2003"
+    ],
+    "mpi": {},
+    "openmp": {
+        "flags": [
+            "-fopenmp"
+        ],
+        "libs": [
+            "gomp"
+        ]
+    },
+    "openacc": {
+        "flags": [
+            "-ta=multicore",
+            "-Minfo=accel"
+        ]
+    },
+    "family": "GNU",
+    "libs": [
+        "-lm"
+    ],
+    "python": {
+        "flags": [
+            "-Wsign-compare",
+            "-DNDEBUG",
+            "-g",
+            "-fwrapv",
+            "-O2",
+            "-Wall",
+            "-g",
+            "-fstack-protector-strong",
+            "-fstack-clash-protection",
+            "-Wformat",
+            "-Werror=format-security",
+            "-fcf-protection",
+            "-g",
+            "-fwrapv",
+            "-O2"
+        ],
+        "includes": [
+            "/usr/include/python3.11",
+            "/usr/local/lib/python3.11/dist-packages/numpy/core/include"
+        ],
+        "shared_suffix": ".cpython-311-x86_64-linux-gnu.so",
+        "dependencies": [
+            "/usr/lib/x86_64-linux-gnu/libpython3.11.so"
+        ]
+    }
+}
diff --git a/tests/pyccel/scripts/kernel/host_array.py b/tests/pyccel/scripts/kernel/host_array.py
index cacbcc1da5..e686521e43 100644
--- a/tests/pyccel/scripts/kernel/host_array.py
+++ b/tests/pyccel/scripts/kernel/host_array.py
@@ -1,10 +1,10 @@
 # pylint: disable=missing-function-docstring, missing-module-docstring
 from  pyccel import cuda
+def f():
+    a = cuda.host_empty(10)
 
-a = cuda.host_empty(10)
-
-for i in range(10):
-    a[i] = 1
-
-if __name__ == '__main__':
+    for i in range(10):
+        a[i] = 1
     print(a)
+if __name__ == '__main__':
+    f()
diff --git a/tests/pyccel/scripts/test2.json b/tests/pyccel/scripts/test2.json
new file mode 100644
index 0000000000..bd74b25867
--- /dev/null
+++ b/tests/pyccel/scripts/test2.json
@@ -0,0 +1,67 @@
+{
+    "exec": "gfortran",
+    "mpi_exec": "mpif90",
+    "language": "fortran",
+    "module_output_flag": "-J",
+    "debug_flags": [
+        "-fcheck=bounds",
+        "-g",
+        "-O0"
+    ],
+    "release_flags": [
+        "-O3",
+        "-funroll-loops"
+    ],
+    "general_flags": [
+        "-fPIC"
+    ],
+    "standard_flags": [
+        "-std=f2003"
+    ],
+    "mpi": {},
+    "openmp": {
+        "flags": [
+            "-fopenmp"
+        ],
+        "libs": [
+            "gomp"
+        ]
+    },
+    "openacc": {
+        "flags": [
+            "-ta=multicore",
+            "-Minfo=accel"
+        ]
+    },
+    "family": "GNU",
+    "libs": [
+        "-lm"
+    ],
+    "python": {
+        "flags": [
+            "-Wsign-compare",
+            "-DNDEBUG",
+            "-g",
+            "-fwrapv",
+            "-O2",
+            "-Wall",
+            "-g",
+            "-fstack-protector-strong",
+            "-fstack-clash-protection",
+            "-Wformat",
+            "-Werror=format-security",
+            "-fcf-protection",
+            "-g",
+            "-fwrapv",
+            "-O2"
+        ],
+        "includes": [
+            "/usr/include/python3.11",
+            "/usr/local/lib/python3.11/dist-packages/numpy/core/include"
+        ],
+        "shared_suffix": ".cpython-311-x86_64-linux-gnu.so",
+        "dependencies": [
+            "/usr/lib/x86_64-linux-gnu/libpython3.11.so"
+        ]
+    }
+}
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index 6e6f1ba519..9b0f0d443e 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -56,27 +56,12 @@ def get_python_output(abs_path, cwd = None):
     return out
 
 #------------------------------------------------------------------------------
-def cat_file(file_path):
-    try:
-        with open(file_path, 'r') as file:
-            content = file.read()
-            print(content)
-    except FileNotFoundError:
-        print(f"Error: The file {file_path} does not exist.")
-    except Exception as e:
-        print(f"An error occurred: {e}")
-
-# Path to the file you want to display
-
 def compile_pyccel(path_dir, test_file, options = ""):
     if "python" in options and "--output" not in options:
         options += " --output=__pyccel__"
     cmd = [shutil.which("pyccel"), test_file]
     if options != "":
         cmd += options.strip().split()
-    file_path = "/__w/pyccel-cuda/pyccel-cuda/tests/pyccel/scripts/kernel/__pyccel__/cuda_ndarrays/cuda_ndarrays.cu"
-    cat_file(file_path)
-    exit(0)
     p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir)
     p.wait()
     assert p.returncode==0

From 3c1387b424d99eddc72def20873d20ab9ffc2bc3 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Thu, 18 Jul 2024 17:57:49 +0100
Subject: [PATCH 080/130] debuging perpose

---
 .../pyccel/scripts/hope_benchmarks/test.json  | 67 -------------------
 tests/pyccel/scripts/test2.json               | 67 -------------------
 2 files changed, 134 deletions(-)
 delete mode 100644 tests/pyccel/scripts/hope_benchmarks/test.json
 delete mode 100644 tests/pyccel/scripts/test2.json

diff --git a/tests/pyccel/scripts/hope_benchmarks/test.json b/tests/pyccel/scripts/hope_benchmarks/test.json
deleted file mode 100644
index bd74b25867..0000000000
--- a/tests/pyccel/scripts/hope_benchmarks/test.json
+++ /dev/null
@@ -1,67 +0,0 @@
-{
-    "exec": "gfortran",
-    "mpi_exec": "mpif90",
-    "language": "fortran",
-    "module_output_flag": "-J",
-    "debug_flags": [
-        "-fcheck=bounds",
-        "-g",
-        "-O0"
-    ],
-    "release_flags": [
-        "-O3",
-        "-funroll-loops"
-    ],
-    "general_flags": [
-        "-fPIC"
-    ],
-    "standard_flags": [
-        "-std=f2003"
-    ],
-    "mpi": {},
-    "openmp": {
-        "flags": [
-            "-fopenmp"
-        ],
-        "libs": [
-            "gomp"
-        ]
-    },
-    "openacc": {
-        "flags": [
-            "-ta=multicore",
-            "-Minfo=accel"
-        ]
-    },
-    "family": "GNU",
-    "libs": [
-        "-lm"
-    ],
-    "python": {
-        "flags": [
-            "-Wsign-compare",
-            "-DNDEBUG",
-            "-g",
-            "-fwrapv",
-            "-O2",
-            "-Wall",
-            "-g",
-            "-fstack-protector-strong",
-            "-fstack-clash-protection",
-            "-Wformat",
-            "-Werror=format-security",
-            "-fcf-protection",
-            "-g",
-            "-fwrapv",
-            "-O2"
-        ],
-        "includes": [
-            "/usr/include/python3.11",
-            "/usr/local/lib/python3.11/dist-packages/numpy/core/include"
-        ],
-        "shared_suffix": ".cpython-311-x86_64-linux-gnu.so",
-        "dependencies": [
-            "/usr/lib/x86_64-linux-gnu/libpython3.11.so"
-        ]
-    }
-}
diff --git a/tests/pyccel/scripts/test2.json b/tests/pyccel/scripts/test2.json
deleted file mode 100644
index bd74b25867..0000000000
--- a/tests/pyccel/scripts/test2.json
+++ /dev/null
@@ -1,67 +0,0 @@
-{
-    "exec": "gfortran",
-    "mpi_exec": "mpif90",
-    "language": "fortran",
-    "module_output_flag": "-J",
-    "debug_flags": [
-        "-fcheck=bounds",
-        "-g",
-        "-O0"
-    ],
-    "release_flags": [
-        "-O3",
-        "-funroll-loops"
-    ],
-    "general_flags": [
-        "-fPIC"
-    ],
-    "standard_flags": [
-        "-std=f2003"
-    ],
-    "mpi": {},
-    "openmp": {
-        "flags": [
-            "-fopenmp"
-        ],
-        "libs": [
-            "gomp"
-        ]
-    },
-    "openacc": {
-        "flags": [
-            "-ta=multicore",
-            "-Minfo=accel"
-        ]
-    },
-    "family": "GNU",
-    "libs": [
-        "-lm"
-    ],
-    "python": {
-        "flags": [
-            "-Wsign-compare",
-            "-DNDEBUG",
-            "-g",
-            "-fwrapv",
-            "-O2",
-            "-Wall",
-            "-g",
-            "-fstack-protector-strong",
-            "-fstack-clash-protection",
-            "-Wformat",
-            "-Werror=format-security",
-            "-fcf-protection",
-            "-g",
-            "-fwrapv",
-            "-O2"
-        ],
-        "includes": [
-            "/usr/include/python3.11",
-            "/usr/local/lib/python3.11/dist-packages/numpy/core/include"
-        ],
-        "shared_suffix": ".cpython-311-x86_64-linux-gnu.so",
-        "dependencies": [
-            "/usr/lib/x86_64-linux-gnu/libpython3.11.so"
-        ]
-    }
-}

From c2044813b9be07863f5496a3597229dca098ef1a Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 03:43:58 +0100
Subject: [PATCH 081/130] debuging perpose

---
 pyccel/codegen/utilities.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py
index 056bfaddbf..9ba3dee163 100644
--- a/pyccel/codegen/utilities.py
+++ b/pyccel/codegen/utilities.py
@@ -37,7 +37,7 @@
 # The compile object folder will be in the pyccel dirpath
 internal_libs = {
     "ndarrays"     : ("ndarrays", CompileObj("ndarrays.c",folder="ndarrays")),
-    "cuda_ndarrays": ("cuda_ndarrays", CompileObj("cuda_ndarrays.cu",folder="ndarrays")),
+    "cuda_ndarrays": ("cuda_ndarrays", CompileObj("cuda_ndarrays.cu",folder="cuda_ndarrays")),
     "pyc_math_f90" : ("math", CompileObj("pyc_math_f90.f90",folder="math")),
     "pyc_math_c"   : ("math", CompileObj("pyc_math_c.c",folder="math")),
     "cwrapper"     : ("cwrapper", CompileObj("cwrapper.c",folder="cwrapper", accelerators=('python',))),

From 2517c433d3c9ecac1be83b84ee5e578891d3f592 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 03:55:01 +0100
Subject: [PATCH 082/130] debuging perpose

---
 pyccel/codegen/printing/cucode.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 1d6d02e7e4..7265b9059b 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -193,6 +193,7 @@ def get_declare_type(self, expr):
         rank  = expr.rank
         if not isinstance(class_type, CudaArrayType ) or rank <= 0:
             return super().get_declare_type(expr)
+        self.add_import(c_imports['cuda_ndarrays'])
         self.add_import(c_imports['ndarrays'])
         dtype = 't_ndarray '
         return dtype

From 9778a86d609cf3ee0436640537561241ec9d2922 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 04:09:02 +0100
Subject: [PATCH 083/130] debuging perpose

---
 pyccel/codegen/printing/ccode.py            | 2 ++
 pyccel/codegen/printing/cucode.py           | 6 ++----
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h | 4 ----
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index 3ca1833d4a..cf571c7eaa 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -235,6 +235,7 @@
                  'math',
                  'string',
                  'ndarrays',
+                 'cuda_ndarrays',
                  'complex',
                  'stdint',
                  'pyc_math_c',
@@ -1336,6 +1337,7 @@ def get_declare_type(self, expr):
                 if expr.rank > 15:
                     errors.report(UNSUPPORTED_ARRAY_RANK, symbol=expr, severity='fatal')
                 self.add_import(c_imports['ndarrays'])
+                self.add_import(c_imports['cuda_ndarrays'])
                 dtype = 't_ndarray'
             else:
                 errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal')
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 7265b9059b..0b599d2716 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -19,15 +19,13 @@
 from pyccel.ast.datatypes           import HomogeneousContainerType
 from pyccel.ast.numpytypes          import numpy_precision_map
 from pyccel.ast.cudaext             import CudaFull
+from pyccel.codegen.printing.ccode  import c_imports
+
 
 errors = Errors()
 
 __all__ = ["CudaCodePrinter"]
 
-c_imports = {n : Import(n, Module(n, (), ())) for n in
-                ['cuda_ndarrays',
-                 'ndarrays',
-                 ]}
 
 class CudaCodePrinter(CCodePrinter):
     """
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
index e074443662..e7cbb4581f 100644
--- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
@@ -2,7 +2,6 @@
 # define CUDA_NDARRAYS_H
 
 # include <cuda_runtime.h>
-# include <iostream>
 #include "../ndarrays/ndarrays.h"
 
 
@@ -21,7 +20,4 @@ int32_t cuda_free_host(t_ndarray  arr);
 __host__ __device__
 int32_t cuda_free(t_ndarray  arr);
 
-
-using namespace std;
-
 #endif
\ No newline at end of file

From bc8b5b6415fe3c83615b69dc3506f5e558161079 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 04:21:50 +0100
Subject: [PATCH 084/130] debuging perpose

---
 pyccel/codegen/printing/ccode.py | 1 +
 pyccel/codegen/utilities.py      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index cf571c7eaa..10cc961966 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -571,6 +571,7 @@ def _init_stack_array(self, expr):
                     shape_init, strides_init, len(var.shape), 'false')
         array_init += 'stack_array_init(&{})'.format(self._print(var))
         self.add_import(c_imports['ndarrays'])
+        self.add_import(c_imports['cuda_ndarrays'])
         return buffer_array, array_init
 
     def _handle_inline_func_call(self, expr):
diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py
index 9ba3dee163..4454399b58 100644
--- a/pyccel/codegen/utilities.py
+++ b/pyccel/codegen/utilities.py
@@ -48,6 +48,7 @@
 internal_libs["cwrapper_ndarrays"] = ("cwrapper_ndarrays", CompileObj("cwrapper_ndarrays.c",folder="cwrapper_ndarrays",
                                                              accelerators = ('python',),
                                                              dependencies = (internal_libs["ndarrays"][1],
+                                                                             internal_libs["cuda_ndarrays"][1],
                                                                              internal_libs["cwrapper"][1])))
 
 #==============================================================================

From f2909f4d0f40dd38cc062a5c798449f57e1a9dba Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 04:33:08 +0100
Subject: [PATCH 085/130] debuging perpose

---
 pyccel/codegen/printing/ccode.py  | 1 +
 pyccel/codegen/printing/cucode.py | 2 +-
 pyccel/codegen/utilities.py       | 1 -
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index 10cc961966..7c428e77a7 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -1634,6 +1634,7 @@ def _print_Allocate(self, expr):
             elif (expr.status == 'allocated'):
                 free_code += self._print(Deallocate(variable))
             self.add_import(c_imports['ndarrays'])
+            self.add_import(c_imports['cuda_ndarrays'])
             shape = ", ".join(self._print(i) for i in expr.shape)
             if isinstance(variable.class_type, NumpyNDArrayType):
                 #set dtype to the C struct types
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 0b599d2716..827d32109d 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -191,8 +191,8 @@ def get_declare_type(self, expr):
         rank  = expr.rank
         if not isinstance(class_type, CudaArrayType ) or rank <= 0:
             return super().get_declare_type(expr)
-        self.add_import(c_imports['cuda_ndarrays'])
         self.add_import(c_imports['ndarrays'])
+        self.add_import(c_imports['cuda_ndarrays'])
         dtype = 't_ndarray '
         return dtype
 
diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py
index 4454399b58..9ba3dee163 100644
--- a/pyccel/codegen/utilities.py
+++ b/pyccel/codegen/utilities.py
@@ -48,7 +48,6 @@
 internal_libs["cwrapper_ndarrays"] = ("cwrapper_ndarrays", CompileObj("cwrapper_ndarrays.c",folder="cwrapper_ndarrays",
                                                              accelerators = ('python',),
                                                              dependencies = (internal_libs["ndarrays"][1],
-                                                                             internal_libs["cuda_ndarrays"][1],
                                                                              internal_libs["cwrapper"][1])))
 
 #==============================================================================

From 8f61d134b3c21726504041635b1792f219c6e89f Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 12:14:43 +0100
Subject: [PATCH 086/130] debuging perpose

---
 pyccel/codegen/compiling/compilers.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py
index d909a5036e..b5bc519dc3 100644
--- a/pyccel/codegen/compiling/compilers.py
+++ b/pyccel/codegen/compiling/compilers.py
@@ -496,6 +496,7 @@ def run_command(cmd, verbose):
 
         with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                 universal_newlines=True) as p:
+            p.wait()
             out, err = p.communicate()
 
         if verbose and out:

From 7d027205f4cec4f8688a4f6dca78fc175f79be0b Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 12:32:57 +0100
Subject: [PATCH 087/130] cleaning up my PR

---
 pyccel/codegen/compiling/compilers.py | 2 --
 pyccel/codegen/printing/ccode.py      | 6 ++----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py
index b5bc519dc3..0d496b9e8d 100644
--- a/pyccel/codegen/compiling/compilers.py
+++ b/pyccel/codegen/compiling/compilers.py
@@ -493,10 +493,8 @@ def run_command(cmd, verbose):
         cmd = [os.path.expandvars(c) for c in cmd]
         if verbose:
             print(' '.join(cmd))
-
         with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                 universal_newlines=True) as p:
-            p.wait()
             out, err = p.communicate()
 
         if verbose and out:
diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index 7c428e77a7..7307e47416 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -231,11 +231,11 @@
 import_dict = {'omp_lib' : 'omp' }
 
 c_imports = {n : Import(n, Module(n, (), ())) for n in
-                ['stdlib',
+                ['cuda_ndarrays',
+                 'stdlib',
                  'math',
                  'string',
                  'ndarrays',
-                 'cuda_ndarrays',
                  'complex',
                  'stdint',
                  'pyc_math_c',
@@ -571,7 +571,6 @@ def _init_stack_array(self, expr):
                     shape_init, strides_init, len(var.shape), 'false')
         array_init += 'stack_array_init(&{})'.format(self._print(var))
         self.add_import(c_imports['ndarrays'])
-        self.add_import(c_imports['cuda_ndarrays'])
         return buffer_array, array_init
 
     def _handle_inline_func_call(self, expr):
@@ -1634,7 +1633,6 @@ def _print_Allocate(self, expr):
             elif (expr.status == 'allocated'):
                 free_code += self._print(Deallocate(variable))
             self.add_import(c_imports['ndarrays'])
-            self.add_import(c_imports['cuda_ndarrays'])
             shape = ", ".join(self._print(i) for i in expr.shape)
             if isinstance(variable.class_type, NumpyNDArrayType):
                 #set dtype to the C struct types

From a98a6c2930ffb1634440d01d96002bcba6c75cf9 Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 19 Jul 2024 13:44:57 +0200
Subject: [PATCH 088/130] Target failing test

---
 .github/actions/pytest_run_cuda/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml
index 46f90552ed..a6f77dec05 100644
--- a/.github/actions/pytest_run_cuda/action.yml
+++ b/.github/actions/pytest_run_cuda/action.yml
@@ -11,7 +11,7 @@ runs:
     - name: Ccuda tests with pytest
       run: |
         # Catch exit 5 (no tests found)
-        python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out
+        python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" -sxv -k test_host_array --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests

From e4fcff4d76117dd31840a5f79cf4636abda8298d Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 19 Jul 2024 13:46:48 +0200
Subject: [PATCH 089/130] Examine compilation output

---
 tests/pyccel/test_pyccel.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index 9b0f0d443e..b1a358078d 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -62,7 +62,9 @@ def compile_pyccel(path_dir, test_file, options = ""):
     cmd = [shutil.which("pyccel"), test_file]
     if options != "":
         cmd += options.strip().split()
-    p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir)
+    p = subprocess.run(cmd, capture_output = True, universal_newlines=True, cwd=path_dir)
+    print(p.stdout)
+    print(p.stderr)
     p.wait()
     assert p.returncode==0
 

From 73085bda481ead105268640fd55a34263314236a Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 19 Jul 2024 13:47:23 +0200
Subject: [PATCH 090/130] Run pyccel in verbose mode

---
 tests/pyccel/test_pyccel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index b1a358078d..4ad4e05e17 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -748,7 +748,7 @@ def test_kernel_collision(gpu_available):
 @pytest.mark.cuda
 def test_host_array(gpu_available):
     types = float
-    pyccel_test("scripts/kernel/host_array.py",
+    pyccel_test("scripts/kernel/host_array.py", pyccel_commands = '-v',
             language="cuda", output_dtype=types, execute_code=gpu_available)
 
 #------------------------------------------------------------------------------

From 8215d77205d01b1eb32b230b94c7f36b62ec06e4 Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 19 Jul 2024 13:56:28 +0200
Subject: [PATCH 091/130] Correct flag

---
 tests/pyccel/test_pyccel.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index 4ad4e05e17..e37286c401 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -65,7 +65,6 @@ def compile_pyccel(path_dir, test_file, options = ""):
     p = subprocess.run(cmd, capture_output = True, universal_newlines=True, cwd=path_dir)
     print(p.stdout)
     print(p.stderr)
-    p.wait()
     assert p.returncode==0
 
 #------------------------------------------------------------------------------
@@ -748,7 +747,7 @@ def test_kernel_collision(gpu_available):
 @pytest.mark.cuda
 def test_host_array(gpu_available):
     types = float
-    pyccel_test("scripts/kernel/host_array.py", pyccel_commands = '-v',
+    pyccel_test("scripts/kernel/host_array.py", pyccel_commands = '--verbose',
             language="cuda", output_dtype=types, execute_code=gpu_available)
 
 #------------------------------------------------------------------------------

From 9e75cba250f4b3fbdcc6a9924a924fcc01529e3c Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 13:17:15 +0100
Subject: [PATCH 092/130] debuging perpose

---
 tests/pyccel/test_pyccel.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index e37286c401..88b737e068 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -62,9 +62,9 @@ def compile_pyccel(path_dir, test_file, options = ""):
     cmd = [shutil.which("pyccel"), test_file]
     if options != "":
         cmd += options.strip().split()
-    p = subprocess.run(cmd, capture_output = True, universal_newlines=True, cwd=path_dir)
-    print(p.stdout)
-    print(p.stderr)
+    p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    p.communicate()
+    
     assert p.returncode==0
 
 #------------------------------------------------------------------------------

From f71e741be94be7eddb706b482e38a6ccab204a94 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 13:23:05 +0100
Subject: [PATCH 093/130] Examine compilation output

---
 tests/pyccel/test_pyccel.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index 88b737e068..e37286c401 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -62,9 +62,9 @@ def compile_pyccel(path_dir, test_file, options = ""):
     cmd = [shutil.which("pyccel"), test_file]
     if options != "":
         cmd += options.strip().split()
-    p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    p.communicate()
-    
+    p = subprocess.run(cmd, capture_output = True, universal_newlines=True, cwd=path_dir)
+    print(p.stdout)
+    print(p.stderr)
     assert p.returncode==0
 
 #------------------------------------------------------------------------------

From 44f3503ba874e8b37732735c9945f46b5341282d Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 19 Jul 2024 14:26:26 +0200
Subject: [PATCH 094/130] Check files being compiled and existence

---
 pyccel/codegen/compiling/compilers.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py
index 0d496b9e8d..9ba44fbb23 100644
--- a/pyccel/codegen/compiling/compilers.py
+++ b/pyccel/codegen/compiling/compilers.py
@@ -329,6 +329,8 @@ def compile_module(self, compile_obj, output_folder, verbose = False):
         verbose : bool
             Indicates whether additional output should be shown.
         """
+        print("Compiling : ", compile_obj.source)
+        print(os.path.exists(compile_obj.source))
         if not compile_obj.has_target_file:
             return
 

From 82f1c695b7185a4ecf539b5d35011228b94f1c1f Mon Sep 17 00:00:00 2001
From: Emily Bourne <emily.bourne@epfl.ch>
Date: Fri, 19 Jul 2024 14:29:54 +0200
Subject: [PATCH 095/130] Examine library copy

---
 pyccel/codegen/utilities.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py
index 9ba3dee163..8553b89195 100644
--- a/pyccel/codegen/utilities.py
+++ b/pyccel/codegen/utilities.py
@@ -112,6 +112,7 @@ def copy_internal_library(lib_folder, pyccel_dirpath, extra_files = None):
     str
         The location that the files were copied to.
     """
+    print("copy_internal_library : ", lib_folder)
     # get lib path (stdlib_path/lib_name or ext_path/lib_name)
     if lib_folder in external_libs:
         lib_path = os.path.join(ext_path, external_libs[lib_folder], lib_folder)
@@ -144,6 +145,10 @@ def copy_internal_library(lib_folder, pyccel_dirpath, extra_files = None):
         if to_create:
             # Copy all files from the source to the destination
             shutil.copytree(lib_path, lib_dest_path)
+            dst_files = [os.path.relpath(os.path.join(root, f), lib_dest_path) \
+                    for root, dirs, files in os.walk(lib_dest_path) \
+                    for f in files if not f.endswith('.lock')]
+            print("Created : ", dst_files)
             # Create any requested extra files
             if extra_files:
                 for filename, contents in extra_files.items():

From b9e5c949fc4bfe52826eb2fe48bbf6249d7bcf55 Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:41:27 +0100
Subject: [PATCH 096/130] Trigger tests on push to devel or main branch

---
 .github/workflows/anaconda_linux.yml   | 2 +-
 .github/workflows/anaconda_windows.yml | 2 +-
 .github/workflows/intel.yml            | 2 +-
 .github/workflows/linux.yml            | 2 +-
 .github/workflows/macosx.yml           | 2 +-
 .github/workflows/pickle.yml           | 2 +-
 .github/workflows/pickle_wheel.yml     | 2 +-
 .github/workflows/windows.yml          | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/anaconda_linux.yml b/.github/workflows/anaconda_linux.yml
index 5a5384e5ce..525903a54f 100644
--- a/.github/workflows/anaconda_linux.yml
+++ b/.github/workflows/anaconda_linux.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/anaconda_windows.yml b/.github/workflows/anaconda_windows.yml
index 154a4d01e8..0f3f8a04ed 100644
--- a/.github/workflows/anaconda_windows.yml
+++ b/.github/workflows/anaconda_windows.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: windows-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
index 977d5f9afd..5f340e1088 100644
--- a/.github/workflows/intel.yml
+++ b/.github/workflows/intel.yml
@@ -29,7 +29,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index ad39cee725..664ae3aa60 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   matrix_prep:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
     steps:
diff --git a/.github/workflows/macosx.yml b/.github/workflows/macosx.yml
index 4768a64efa..f51041c0b8 100644
--- a/.github/workflows/macosx.yml
+++ b/.github/workflows/macosx.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: macos-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/pickle.yml b/.github/workflows/pickle.yml
index 052028a5cb..cc3864afd2 100644
--- a/.github/workflows/pickle.yml
+++ b/.github/workflows/pickle.yml
@@ -31,7 +31,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-matrix.outputs.python_version }}
       matrix: ${{ steps.set-matrix.outputs.matrix }}
diff --git a/.github/workflows/pickle_wheel.yml b/.github/workflows/pickle_wheel.yml
index 1dc82af503..718dc13dcc 100644
--- a/.github/workflows/pickle_wheel.yml
+++ b/.github/workflows/pickle_wheel.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 60c560ffee..827038a279 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: windows-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:

From df24e817053a4a1abf925875acb15727cc9850db Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:46:33 +0100
Subject: [PATCH 097/130] Add cuda workflow to test cuda developments on CI

---
 .github/actions/coverage_install/action.yml |  2 +-
 .github/actions/linux_install/action.yml    | 10 +--
 .github/actions/pytest_run/action.yml       |  4 +-
 .github/actions/pytest_run_cuda/action.yml  | 17 +++++
 .github/actions/python_install/action.yml   | 17 +++++
 .github/workflows/cuda.yml                  | 83 +++++++++++++++++++++
 ci_tools/bot_messages/show_tests.txt        |  1 +
 ci_tools/bot_tools/bot_funcs.py             | 12 +--
 ci_tools/devel_branch_tests.py              |  1 +
 ci_tools/json_pytest_output.py              |  2 +-
 10 files changed, 135 insertions(+), 14 deletions(-)
 create mode 100644 .github/actions/pytest_run_cuda/action.yml
 create mode 100644 .github/actions/python_install/action.yml
 create mode 100644 .github/workflows/cuda.yml

diff --git a/.github/actions/coverage_install/action.yml b/.github/actions/coverage_install/action.yml
index ac5294e542..5732baee34 100644
--- a/.github/actions/coverage_install/action.yml
+++ b/.github/actions/coverage_install/action.yml
@@ -15,7 +15,7 @@ runs:
     - name: Directory Creation
       run: |
         INSTALL_DIR=$(cd tests; python -c "import pyccel; print(pyccel.__path__[0])")
-        SITE_DIR=$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
+        SITE_DIR=$(dirname ${INSTALL_DIR})
         echo -e "import coverage; coverage.process_startup()" > ${SITE_DIR}/pyccel_cov.pth
         echo -e "[run]\nparallel = True\nsource = ${INSTALL_DIR}\ndata_file = $(pwd)/.coverage\n[report]\ninclude = ${INSTALL_DIR}/*\n[xml]\noutput = cobertura.xml" > .coveragerc
         echo "SITE_DIR=${SITE_DIR}" >> $GITHUB_ENV
diff --git a/.github/actions/linux_install/action.yml b/.github/actions/linux_install/action.yml
index 8fb5cd8505..0ef9a69b8e 100644
--- a/.github/actions/linux_install/action.yml
+++ b/.github/actions/linux_install/action.yml
@@ -9,22 +9,22 @@ runs:
       shell: bash
     - name: Install fortran
       run:
-        sudo apt-get install gfortran
+        sudo apt-get install -y gfortran
       shell: bash
     - name: Install LaPack
       run:
-        sudo apt-get install libblas-dev liblapack-dev
+        sudo apt-get install -y libblas-dev liblapack-dev
       shell: bash
     - name: Install MPI
       run: |
-        sudo apt-get install libopenmpi-dev openmpi-bin
+        sudo apt-get install -y libopenmpi-dev openmpi-bin
         echo "MPI_OPTS=--oversubscribe" >> $GITHUB_ENV
       shell: bash
     - name: Install OpenMP
       run:
-        sudo apt-get install libomp-dev libomp5
+        sudo apt-get install -y libomp-dev libomp5
       shell: bash
     - name: Install Valgrind
       run:
-        sudo apt-get install valgrind
+        sudo apt-get install -y valgrind
       shell: bash
diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml
index 0b6f0f988d..b0bdc31f16 100644
--- a/.github/actions/pytest_run/action.yml
+++ b/.github/actions/pytest_run/action.yml
@@ -51,13 +51,13 @@ runs:
       working-directory: ./tests
       id: pytest_3
     - name: Test Fortran translations
-      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
+      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
       id: pytest_4
     - name: Test multi-file Fortran translations
       run: |
-        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
+        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml
new file mode 100644
index 0000000000..52092a6e02
--- /dev/null
+++ b/.github/actions/pytest_run_cuda/action.yml
@@ -0,0 +1,17 @@
+name: 'Pyccel pytest commands generating Ccuda'
+inputs:
+  shell_cmd:
+    description: 'Specifies the shell command (different for anaconda)'
+    required: false
+    default: "bash"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Ccuda tests with pytest
+      run: |
+        # Catch exit 5 (no tests found)
+        sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
+        pyccel-clean
+      shell: ${{ inputs.shell_cmd }}
+      working-directory: ./tests
diff --git a/.github/actions/python_install/action.yml b/.github/actions/python_install/action.yml
new file mode 100644
index 0000000000..f9b720e3e1
--- /dev/null
+++ b/.github/actions/python_install/action.yml
@@ -0,0 +1,17 @@
+name: 'Python installation commands'
+
+runs:
+  using: "composite"
+  steps:
+    - name: Install python
+      run:
+        sudo apt-get -y install python3-dev
+      shell: bash
+    - name: python as python3
+      run:
+        sudo apt-get -y install python-is-python3
+      shell: bash
+    - name: Install Pip
+      run:
+        sudo apt-get -y install python3-pip
+      shell: bash
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
new file mode 100644
index 0000000000..833ebf5d85
--- /dev/null
+++ b/.github/workflows/cuda.yml
@@ -0,0 +1,83 @@
+name: Cuda unit tests
+
+on:
+  workflow_dispatch:
+    inputs:
+      python_version:
+        required: false
+        type: string
+      ref:
+        required: false
+        type: string
+      check_run_id:
+        required: false
+        type: string
+      pr_repo:
+        required: false
+        type: string
+  push:
+    branches: [devel, main]
+
+env:
+  COMMIT: ${{ inputs.ref || github.event.ref }}
+  PEM: ${{ secrets.BOT_PEM }}
+  GITHUB_RUN_ID: ${{ github.run_id }}
+  GITHUB_CHECK_RUN_ID: ${{ inputs.check_run_id }}
+  PR_REPO: ${{ inputs.pr_repo || github.repository }}
+
+jobs:
+  Cuda:
+
+    runs-on: ubuntu-20.04
+    name: Unit tests
+
+    container: nvidia/cuda:11.7.1-devel-ubuntu20.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          ref: ${{ env.COMMIT }}
+          repository: ${{ env.PR_REPO }}
+      - name: Prepare docker
+        run: |
+          apt update && apt install sudo
+          TZ=Europe/France
+          ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+          DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata
+        shell: bash
+      - name: Install python (setup-python action doesn't work with containers)
+        uses: ./.github/actions/python_install
+      - name: "Setup"
+        id: token
+        run: |
+          pip install jwt requests
+          python ci_tools/setup_check_run.py cuda
+      - name: CUDA Version
+        run: nvcc --version # cuda install check
+      - name: Install dependencies
+        uses: ./.github/actions/linux_install
+      - name: Install Pyccel with tests
+        run: |
+            PATH=${PATH}:$HOME/.local/bin
+            echo "PATH=${PATH}" >> $GITHUB_ENV
+            python -m pip install --upgrade pip
+            python -m pip install --user .[test]
+        shell: bash
+      - name: Coverage install
+        uses: ./.github/actions/coverage_install
+      - name: Ccuda tests with pytest
+        id: cuda_pytest
+        uses: ./.github/actions/pytest_run_cuda
+      - name: Collect coverage information
+        continue-on-error: True
+        uses: ./.github/actions/coverage_collection
+      - name: Save code coverage report
+        uses: actions/upload-artifact@v3
+        with:
+          name: coverage-artifact
+          path: .coverage
+          retention-days: 1
+      - name: "Post completed"
+        if: always()
+        run:
+          python ci_tools/complete_check_run.py ${{ steps.cuda_pytest.outcome }}
+
diff --git a/ci_tools/bot_messages/show_tests.txt b/ci_tools/bot_messages/show_tests.txt
index adc07e8431..eb15492d2e 100644
--- a/ci_tools/bot_messages/show_tests.txt
+++ b/ci_tools/bot_messages/show_tests.txt
@@ -2,6 +2,7 @@ The following is a list of keywords which can be used to run tests. Tests in bol
 - **linux** : Runs the unit tests on a Linux system.
 - **windows** : Runs the unit tests on a Windows system.
 - **macosx** : Runs the unit tests on a MacOS X system.
+- **cuda** : Runs the cuda unit tests on a Linux system.
 - **coverage** : Runs the unit tests on a Linux system and checks the coverage of the tests.
 - **docs** : Checks if the documentation follows the numpydoc format.
 - **pylint** : Runs pylint on files which are too big to be handled by codacy.
diff --git a/ci_tools/bot_tools/bot_funcs.py b/ci_tools/bot_tools/bot_funcs.py
index 7084a01bb9..1621d1d089 100644
--- a/ci_tools/bot_tools/bot_funcs.py
+++ b/ci_tools/bot_tools/bot_funcs.py
@@ -23,7 +23,8 @@
         'pyccel_lint': '3.8',
         'pylint': '3.8',
         'spelling': '3.8',
-        'windows': '3.8'
+        'windows': '3.8',
+        'cuda': '-'
         }
 
 test_names = {
@@ -40,15 +41,16 @@
         'pyccel_lint': "Pyccel best practices",
         'pylint': "Python linting",
         'spelling': "Spelling verification",
-        'windows': "Unit tests on Windows"
+        'windows': "Unit tests on Windows",
+        'cuda': "Unit tests on Linux with cuda"
         }
 
-test_dependencies = {'coverage':['linux']}
+test_dependencies = {'coverage':['linux', 'cuda']}
 
 tests_with_base = ('coverage', 'docs', 'pyccel_lint', 'pylint')
 
 pr_test_keys = ('linux', 'windows', 'macosx', 'coverage', 'docs', 'pylint',
-                'pyccel_lint', 'spelling')
+                'pyccel_lint', 'spelling', 'cuda')
 
 review_stage_labels = ["needs_initial_review", "Ready_for_review", "Ready_to_merge"]
 
@@ -420,7 +422,7 @@ def is_test_required(self, commit_log, name, key, state):
             True if the test should be run, False otherwise.
         """
         print("Checking : ", name, key)
-        if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel'):
+        if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel', 'cuda'):
             has_relevant_change = lambda diff: any((f.startswith('pyccel/') or f.startswith('tests/')) #pylint: disable=unnecessary-lambda-assignment
                                                     and f.endswith('.py') and f != 'pyccel/version.py'
                                                     for f in diff)
diff --git a/ci_tools/devel_branch_tests.py b/ci_tools/devel_branch_tests.py
index 1102ef9e92..ec67b6c49a 100644
--- a/ci_tools/devel_branch_tests.py
+++ b/ci_tools/devel_branch_tests.py
@@ -15,3 +15,4 @@
     bot.run_tests(['anaconda_linux'], '3.10', force_run = True)
     bot.run_tests(['anaconda_windows'], '3.10', force_run = True)
     bot.run_tests(['intel'], '3.9', force_run = True)
+    bot.run_tests(['cuda'], '-', force_run = True)
diff --git a/ci_tools/json_pytest_output.py b/ci_tools/json_pytest_output.py
index 409ae76d72..b84f4a4c09 100644
--- a/ci_tools/json_pytest_output.py
+++ b/ci_tools/json_pytest_output.py
@@ -61,7 +61,7 @@ def     mini_md_summary(title, outcome, failed_tests):
     summary = ""
 
     failed_pattern = re.compile(r".*FAILED.*")
-    languages = ('c', 'fortran', 'python')
+    languages = ('c', 'fortran', 'python', 'cuda')
     pattern = {lang: re.compile(r".*\["+lang+r"\]\ \_.*") for lang in languages}
 
     for i in p_args.tests:

From 31d72476d34224b6e25a1f29f1fbefed5956db54 Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:41:27 +0100
Subject: [PATCH 098/130] Trigger tests on push to devel or main branch

---
 .github/workflows/deploy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 9111b47d52..cf52b1c624 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -10,7 +10,7 @@ jobs:
   waitForWorklows:
     name: Wait for workflows
     runs-on: ubuntu-latest
-    if: github.event.workflow_run.head_branch == 'main'
+    if: github.event.workflow_run.head_branch == 'main' && github.repository == 'pyccel/pyccel'
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4

From 17aa0e624d4fcf3819ef710b27024b104c0452c7 Mon Sep 17 00:00:00 2001
From: bauom <40796259+bauom@users.noreply.github.com>
Date: Wed, 28 Feb 2024 18:11:50 +0100
Subject: [PATCH 099/130] [init] Adding CUDA language/compiler and CodePrinter
 (#32)

This PR aims to make the C code compilable using nvcc. The cuda language was added as well as a CudaCodePrinter.

Changes to stdlib:

Wrapped expressions using complex types in an `ifndef __NVCC__` to avoid processing them with the nvcc compiler

---------

Co-authored-by: Mouad Elalj, EmilyBourne
---
 .dict_custom.txt                           |   1 +
 .github/actions/pytest_parallel/action.yml |   4 +-
 .github/actions/pytest_run/action.yml      |   4 +-
 .github/actions/pytest_run_cuda/action.yml |  11 +-
 CHANGELOG.md                               |   6 +
 pyccel/codegen/codegen.py                  |   8 +-
 pyccel/codegen/compiling/compilers.py      |   5 +-
 pyccel/codegen/pipeline.py                 |   5 +-
 pyccel/codegen/printing/cucode.py          |  74 +++++++++++
 pyccel/commands/console.py                 |   2 +-
 pyccel/compilers/default_compilers.py      |  13 +-
 pyccel/naming/__init__.py                  |   4 +-
 pyccel/naming/cudanameclashchecker.py      |  92 ++++++++++++++
 pyccel/stdlib/numpy/numpy_c.c              |   2 +
 pyccel/stdlib/numpy/numpy_c.h              |   2 +
 pytest.ini                                 |   1 +
 tests/conftest.py                          |  11 ++
 tests/epyccel/test_base.py                 | 136 ++++++++++-----------
 18 files changed, 298 insertions(+), 83 deletions(-)
 create mode 100644 pyccel/codegen/printing/cucode.py
 create mode 100644 pyccel/naming/cudanameclashchecker.py

diff --git a/.dict_custom.txt b/.dict_custom.txt
index b9240f6215..161337d33b 100644
--- a/.dict_custom.txt
+++ b/.dict_custom.txt
@@ -110,6 +110,7 @@ Valgrind
 variadic
 subclasses
 oneAPI
+Cuda
 getter
 setter
 bitwise
diff --git a/.github/actions/pytest_parallel/action.yml b/.github/actions/pytest_parallel/action.yml
index c7c77d99c7..f91d84915b 100644
--- a/.github/actions/pytest_parallel/action.yml
+++ b/.github/actions/pytest_parallel/action.yml
@@ -10,8 +10,8 @@ runs:
   steps:
     - name: Test with pytest
       run: |
-        mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m parallel -rXx
-        #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m parallel -rXx
+        mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m "parallel and not cuda" -rXx
+        #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m "parallel and not cuda" -rXx
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
 
diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml
index b0bdc31f16..451fa39e92 100644
--- a/.github/actions/pytest_run/action.yml
+++ b/.github/actions/pytest_run/action.yml
@@ -51,13 +51,13 @@ runs:
       working-directory: ./tests
       id: pytest_3
     - name: Test Fortran translations
-      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
+      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
       id: pytest_4
     - name: Test multi-file Fortran translations
       run: |
-        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
+        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml
index 52092a6e02..46f90552ed 100644
--- a/.github/actions/pytest_run_cuda/action.yml
+++ b/.github/actions/pytest_run_cuda/action.yml
@@ -1,4 +1,4 @@
-name: 'Pyccel pytest commands generating Ccuda'
+name: 'Pyccel pytest commands generating Cuda'
 inputs:
   shell_cmd:
     description: 'Specifies the shell command (different for anaconda)'
@@ -11,7 +11,14 @@ runs:
     - name: Ccuda tests with pytest
       run: |
         # Catch exit 5 (no tests found)
-        sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
+        python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
+    - name: Final step
+      if: always()
+      id: status
+      run:
+        python ci_tools/json_pytest_output.py -t "Cuda Test Summary" --tests "Cuda tests:${{ steps.pytest_1.outcome }}:tests/s1_outfile.out"
+                      
+      shell: ${{ inputs.shell_cmd }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7c0dbecd66..18fc7f947d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,12 @@
 # Change Log
 All notable changes to this project will be documented in this file.
 
+## \[Cuda - UNRELEASED\]
+
+### Added
+
+-   #32 : add support for `nvcc` Compiler and `cuda` language as a possible option.
+
 ## \[UNRELEASED\]
 
 ### Added
diff --git a/pyccel/codegen/codegen.py b/pyccel/codegen/codegen.py
index a7a02d7804..33721a48e8 100644
--- a/pyccel/codegen/codegen.py
+++ b/pyccel/codegen/codegen.py
@@ -9,16 +9,18 @@
 from pyccel.codegen.printing.fcode  import FCodePrinter
 from pyccel.codegen.printing.ccode  import CCodePrinter
 from pyccel.codegen.printing.pycode import PythonCodePrinter
+from pyccel.codegen.printing.cucode import CudaCodePrinter
 
 from pyccel.ast.core      import FunctionDef, Interface, ModuleHeader
 from pyccel.utilities.stage import PyccelStage
 
-_extension_registry = {'fortran': 'f90', 'c':'c',  'python':'py'}
-_header_extension_registry = {'fortran': None, 'c':'h',  'python':None}
+_extension_registry = {'fortran': 'f90', 'c':'c',  'python':'py', 'cuda':'cu'}
+_header_extension_registry = {'fortran': None, 'c':'h',  'python':None, 'cuda':'h'}
 printer_registry    = {
                         'fortran':FCodePrinter,
                         'c':CCodePrinter,
-                        'python':PythonCodePrinter
+                        'python':PythonCodePrinter,
+                        'cuda':CudaCodePrinter
                       }
 
 pyccel_stage = PyccelStage()
diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py
index c866ee5b1a..d909a5036e 100644
--- a/pyccel/codegen/compiling/compilers.py
+++ b/pyccel/codegen/compiling/compilers.py
@@ -444,7 +444,10 @@ def compile_shared_library(self, compile_obj, output_folder, verbose = False, sh
         # Collect compile information
         exec_cmd, includes, libs_flags, libdirs_flags, m_code = \
                 self._get_compile_components(compile_obj, accelerators)
-        linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags]
+        if self._info['exec'] == 'nvcc':
+            linker_libdirs_flags = ['-Xcompiler' if l == '-L' else f'"-Wl,-rpath,{l}"' for l in libdirs_flags]
+        else:
+            linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags]
 
         flags.insert(0,"-shared")
 
diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py
index 14087fb567..eb357fab74 100644
--- a/pyccel/codegen/pipeline.py
+++ b/pyccel/codegen/pipeline.py
@@ -180,9 +180,10 @@ def handle_error(stage):
     if language is None:
         language = 'fortran'
 
-    # Choose Fortran compiler
+    # Choose Default compiler
     if compiler is None:
-        compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', 'GNU')
+        default_compiler_family = 'nvidia' if language == 'cuda' else 'GNU'
+        compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', default_compiler_family)
 
     fflags = [] if fflags is None else fflags.split()
     wrapper_flags = [] if wrapper_flags is None else wrapper_flags.split()
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
new file mode 100644
index 0000000000..86146b065b
--- /dev/null
+++ b/pyccel/codegen/printing/cucode.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Provide tools for generating and handling CUDA code.
+This module is designed to interface Pyccel's Abstract Syntax Tree (AST) with CUDA,
+enabling the direct translation of high-level Pyccel expressions into CUDA code.
+"""
+
+from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers
+
+from pyccel.ast.core        import Import, Module
+
+from pyccel.errors.errors   import Errors
+
+
+errors = Errors()
+
+__all__ = ["CudaCodePrinter"]
+
+class CudaCodePrinter(CCodePrinter):
+    """
+    Print code in CUDA format.
+
+    This printer converts Pyccel's Abstract Syntax Tree (AST) into strings of CUDA code.
+    Navigation through this file utilizes _print_X functions,
+    as is common with all printers.
+
+    Parameters
+    ----------
+    filename : str
+            The name of the file being pyccelised.
+    prefix_module : str
+            A prefix to be added to the name of the module.
+    """
+    language = "cuda"
+
+    def __init__(self, filename, prefix_module = None):
+
+        errors.set_target(filename)
+
+        super().__init__(filename)
+
+    def _print_Module(self, expr):
+        self.set_scope(expr.scope)
+        self._current_module = expr.name
+        body = ''.join(self._print(i) for i in expr.body)
+
+        global_variables = ''.join(self._print(d) for d in expr.declarations)
+
+        # Print imports last to be sure that all additional_imports have been collected
+        imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()]
+        c_headers_imports = ''
+        local_imports = ''
+
+        for imp in imports:
+            if imp.source in c_library_headers:
+                c_headers_imports += self._print(imp)
+            else:
+                local_imports += self._print(imp)
+
+        imports = f'{c_headers_imports}\
+                    extern "C"{{\n\
+                    {local_imports}\
+                    }}'
+
+        code = f'{imports}\n\
+                 {global_variables}\n\
+                 {body}\n'
+
+        self.exit_scope()
+        return code
diff --git a/pyccel/commands/console.py b/pyccel/commands/console.py
index 596c440ec0..fcbec009de 100644
--- a/pyccel/commands/console.py
+++ b/pyccel/commands/console.py
@@ -80,7 +80,7 @@ def pyccel(files=None, mpi=None, openmp=None, openacc=None, output_dir=None, com
     # ... backend compiler options
     group = parser.add_argument_group('Backend compiler options')
 
-    group.add_argument('--language', choices=('fortran', 'c', 'python'), help='Generated language')
+    group.add_argument('--language', choices=('fortran', 'c', 'python', 'cuda'), help='Generated language')
 
     group.add_argument('--compiler', help='Compiler family or json file containing a compiler description {GNU,intel,PGI}')
 
diff --git a/pyccel/compilers/default_compilers.py b/pyccel/compilers/default_compilers.py
index 166085d22e..d47856773c 100644
--- a/pyccel/compilers/default_compilers.py
+++ b/pyccel/compilers/default_compilers.py
@@ -185,6 +185,15 @@
                 },
             'family': 'nvidia',
             }
+#------------------------------------------------------------
+nvcc_info = {'exec'         : 'nvcc',
+             'language'     : 'cuda',
+             'debug_flags'  : ("-g",),
+             'release_flags': ("-O3",),
+             'general_flags': ('--compiler-options', '-fPIC',),
+             'family'       : 'nvidia'
+            }
+
 
 #------------------------------------------------------------
 def change_to_lib_flag(lib):
@@ -288,6 +297,7 @@ def change_to_lib_flag(lib):
 pgfortran_info.update(python_info)
 nvc_info.update(python_info)
 nvfort_info.update(python_info)
+nvcc_info.update(python_info)
 
 available_compilers = {('GNU', 'c') : gcc_info,
                        ('GNU', 'fortran') : gfort_info,
@@ -296,6 +306,7 @@ def change_to_lib_flag(lib):
                        ('PGI', 'c') : pgcc_info,
                        ('PGI', 'fortran') : pgfortran_info,
                        ('nvidia', 'c') : nvc_info,
-                       ('nvidia', 'fortran') : nvfort_info}
+                       ('nvidia', 'fortran') : nvfort_info,
+                       ('nvidia', 'cuda'): nvcc_info}
 
 vendors = ('GNU','intel','PGI','nvidia')
diff --git a/pyccel/naming/__init__.py b/pyccel/naming/__init__.py
index 72c318d3ad..b3e4bbbe0e 100644
--- a/pyccel/naming/__init__.py
+++ b/pyccel/naming/__init__.py
@@ -10,7 +10,9 @@
 from .fortrannameclashchecker import FortranNameClashChecker
 from .cnameclashchecker import CNameClashChecker
 from .pythonnameclashchecker import PythonNameClashChecker
+from .cudanameclashchecker import CudaNameClashChecker
 
 name_clash_checkers = {'fortran':FortranNameClashChecker(),
         'c':CNameClashChecker(),
-        'python':PythonNameClashChecker()}
+        'python':PythonNameClashChecker(),
+        'cuda':CudaNameClashChecker()}
diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py
new file mode 100644
index 0000000000..971204e912
--- /dev/null
+++ b/pyccel/naming/cudanameclashchecker.py
@@ -0,0 +1,92 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Handles name clash problems in Cuda
+"""
+from .languagenameclashchecker import LanguageNameClashChecker
+
+class CudaNameClashChecker(LanguageNameClashChecker):
+    """
+    Class containing functions to help avoid problematic names in Cuda.
+
+    A class which provides functionalities to check or propose variable names and
+    verify that they do not cause name clashes. Name clashes may be due to
+    new variables, or due to the use of reserved keywords.
+    """
+    # Keywords as mentioned on https://en.cppreference.com/w/c/keyword
+    keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const',
+        'continue', 'default', 'do', 'double', 'else', 'enum',
+        'extern', 'float', 'for', 'goto', 'if', 'inline', 'int',
+        'long', 'register', 'restrict', 'return', 'short', 'signed',
+        'sizeof', 'static', 'struct', 'switch', 'typedef', 'union',
+        'unsigned', 'void', 'volatile', 'whie', '_Alignas',
+        '_Alignof', '_Atomic', '_Bool', '_Complex', 'Decimal128',
+        '_Decimal32', '_Decimal64', '_Generic', '_Imaginary',
+        '_Noreturn', '_Static_assert', '_Thread_local', 't_ndarray',
+        'array_create', 'new_slice', 'array_slicing', 'alias_assign',
+        'transpose_alias_assign', 'array_fill', 't_slice',
+        'GET_INDEX_EXP1', 'GET_INDEX_EXP2', 'GET_INDEX_EXP2',
+        'GET_INDEX_EXP3', 'GET_INDEX_EXP4', 'GET_INDEX_EXP5',
+        'GET_INDEX_EXP6', 'GET_INDEX_EXP7', 'GET_INDEX_EXP8',
+        'GET_INDEX_EXP9', 'GET_INDEX_EXP10', 'GET_INDEX_EXP11',
+        'GET_INDEX_EXP12', 'GET_INDEX_EXP13', 'GET_INDEX_EXP14',
+        'GET_INDEX_EXP15', 'NUM_ARGS_H1', 'NUM_ARGS',
+        'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX',
+        'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer',
+        'get_index', 'numpy_to_ndarray_strides',
+        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'])
+
+    def has_clash(self, name, symbols):
+        """
+        Indicate whether the proposed name causes any clashes.
+
+        Checks if a suggested name conflicts with predefined
+        keywords or specified symbols,returning true for a clash.
+        This method is crucial for maintaining namespace integrity and
+        preventing naming conflicts in code generation processes.
+
+        Parameters
+        ----------
+        name : str
+            The suggested name.
+        symbols : set
+            Symbols which should be considered as collisions.
+
+        Returns
+        -------
+        bool
+            True if the name is a collision.
+            False if the name is collision free.
+        """
+        return any(name == k for k in self.keywords) or \
+               any(name == s for s in symbols)
+
+    def get_collisionless_name(self, name, symbols):
+        """
+        Get a valid name which doesn't collision with symbols or Cuda keywords.
+
+        Find a new name based on the suggested name which will not cause
+        conflicts with Cuda keywords, does not appear in the provided symbols,
+        and is a valid name in Cuda code.
+
+        Parameters
+        ----------
+        name : str
+            The suggested name.
+        symbols : set
+            Symbols which should be considered as collisions.
+
+        Returns
+        -------
+        str
+            A new name which is collision free.
+        """
+        if len(name)>4 and all(name[i] == '_' for i in (0,1,-1,-2)):
+            # Ignore magic methods
+            return name
+        if name[0] == '_':
+            name = 'private'+name
+        return self._get_collisionless_name(name, symbols)
diff --git a/pyccel/stdlib/numpy/numpy_c.c b/pyccel/stdlib/numpy/numpy_c.c
index 7c9ecbbf6b..bc56214772 100644
--- a/pyccel/stdlib/numpy/numpy_c.c
+++ b/pyccel/stdlib/numpy/numpy_c.c
@@ -17,8 +17,10 @@ double  fsign(double x)
     return SIGN(x);
 }
 
+#ifndef __NVCC__
 /* numpy.sign for complex */
 double complex csign(double complex x)
 {
     return x ? ((!creal(x) && cimag(x) < 0) || (creal(x) < 0) ? -1 : 1) : 0;
 }
+#endif
diff --git a/pyccel/stdlib/numpy/numpy_c.h b/pyccel/stdlib/numpy/numpy_c.h
index e72cf3ad57..c2a16a5516 100644
--- a/pyccel/stdlib/numpy/numpy_c.h
+++ b/pyccel/stdlib/numpy/numpy_c.h
@@ -15,6 +15,8 @@
 
 long long int isign(long long int x);
 double fsign(double x);
+#ifndef __NVCC__
 double complex csign(double complex x);
+#endif
 
 #endif
diff --git a/pytest.ini b/pytest.ini
index 42eb0d72ba..3792ab65f9 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -9,3 +9,4 @@ markers =
     python: test to generate python code
     xdist_incompatible: test which compiles a file also compiled by another test
     external: test using an external dll (problematic with conda on Windows)
+    cuda: test to generate cuda code
diff --git a/tests/conftest.py b/tests/conftest.py
index 79144b6978..a5082ef6e8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -21,6 +21,17 @@
 def language(request):
     return request.param
 
+@pytest.fixture( params=[
+        pytest.param("fortran", marks = pytest.mark.fortran),
+        pytest.param("c", marks = pytest.mark.c),
+        pytest.param("python", marks = pytest.mark.python),
+        pytest.param("cuda", marks = pytest.mark.cuda)
+    ],
+    scope = "session"
+)
+def language_with_cuda(request):
+    return request.param
+
 def move_coverage(path_dir):
     for root, _, files in os.walk(path_dir):
         for name in files:
diff --git a/tests/epyccel/test_base.py b/tests/epyccel/test_base.py
index c22064d321..413f79eef1 100644
--- a/tests/epyccel/test_base.py
+++ b/tests/epyccel/test_base.py
@@ -7,128 +7,128 @@
 from utilities import epyccel_test
 
 
-def test_is_false(language):
-    test = epyccel_test(base.is_false, lang=language)
+def test_is_false(language_with_cuda):
+    test = epyccel_test(base.is_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_is_true(language):
-    test = epyccel_test(base.is_true, lang=language)
+def test_is_true(language_with_cuda):
+    test = epyccel_test(base.is_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_compare_is(language):
-    test = epyccel_test(base.compare_is, lang=language)
+def test_compare_is(language_with_cuda):
+    test = epyccel_test(base.compare_is, lang=language_with_cuda)
     test.compare_epyccel( True, True )
     test.compare_epyccel( True, False )
     test.compare_epyccel( False, True )
     test.compare_epyccel( False, False )
 
-def test_compare_is_not(language):
-    test = epyccel_test(base.compare_is_not, lang=language)
+def test_compare_is_not(language_with_cuda):
+    test = epyccel_test(base.compare_is_not, lang=language_with_cuda)
     test.compare_epyccel( True, True )
     test.compare_epyccel( True, False )
     test.compare_epyccel( False, True )
     test.compare_epyccel( False, False )
 
-def test_compare_is_int(language):
-    test = epyccel_test(base.compare_is_int, lang=language)
+def test_compare_is_int(language_with_cuda):
+    test = epyccel_test(base.compare_is_int, lang=language_with_cuda)
     test.compare_epyccel( True, 1 )
     test.compare_epyccel( True, 0 )
     test.compare_epyccel( False, 1 )
     test.compare_epyccel( False, 0 )
 
-def test_compare_is_not_int(language):
-    test = epyccel_test(base.compare_is_not_int, lang=language)
+def test_compare_is_not_int(language_with_cuda):
+    test = epyccel_test(base.compare_is_not_int, lang=language_with_cuda)
     test.compare_epyccel( True, 1 )
     test.compare_epyccel( True, 0 )
     test.compare_epyccel( False, 1 )
     test.compare_epyccel( False, 0 )
 
-def test_not_false(language):
-    test = epyccel_test(base.not_false, lang=language)
+def test_not_false(language_with_cuda):
+    test = epyccel_test(base.not_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not_true(language):
-    test = epyccel_test(base.not_true, lang=language)
+def test_not_true(language_with_cuda):
+    test = epyccel_test(base.not_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_eq_false(language):
-    test = epyccel_test(base.eq_false, lang=language)
+def test_eq_false(language_with_cuda):
+    test = epyccel_test(base.eq_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_eq_true(language):
-    test = epyccel_test(base.eq_true, lang=language)
+def test_eq_true(language_with_cuda):
+    test = epyccel_test(base.eq_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_neq_false(language):
-    test = epyccel_test(base.eq_false, lang=language)
+def test_neq_false(language_with_cuda):
+    test = epyccel_test(base.eq_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_neq_true(language):
-    test = epyccel_test(base.eq_true, lang=language)
+def test_neq_true(language_with_cuda):
+    test = epyccel_test(base.eq_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not(language):
-    test = epyccel_test(base.not_val, lang=language)
+def test_not(language_with_cuda):
+    test = epyccel_test(base.not_val, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not_int(language):
-    test = epyccel_test(base.not_int, lang=language)
+def test_not_int(language_with_cuda):
+    test = epyccel_test(base.not_int, lang=language_with_cuda)
     test.compare_epyccel( 0 )
     test.compare_epyccel( 4 )
 
-def test_compare_is_nil(language):
-    test = epyccel_test(base.is_nil, lang=language)
+def test_compare_is_nil(language_with_cuda):
+    test = epyccel_test(base.is_nil, lang=language_with_cuda)
     test.compare_epyccel( None )
 
-def test_compare_is_not_nil(language):
-    test = epyccel_test(base.is_not_nil, lang=language)
+def test_compare_is_not_nil(language_with_cuda):
+    test = epyccel_test(base.is_not_nil, lang=language_with_cuda)
     test.compare_epyccel( None )
 
-def test_cast_int(language):
-    test = epyccel_test(base.cast_int, lang=language)
+def test_cast_int(language_with_cuda):
+    test = epyccel_test(base.cast_int, lang=language_with_cuda)
     test.compare_epyccel( 4 )
-    test = epyccel_test(base.cast_float_to_int, lang=language)
+    test = epyccel_test(base.cast_float_to_int, lang=language_with_cuda)
     test.compare_epyccel( 4.5 )
 
-def test_cast_bool(language):
-    test = epyccel_test(base.cast_bool, lang=language)
+def test_cast_bool(language_with_cuda):
+    test = epyccel_test(base.cast_bool, lang=language_with_cuda)
     test.compare_epyccel( True )
 
-def test_cast_float(language):
-    test = epyccel_test(base.cast_float, lang=language)
+def test_cast_float(language_with_cuda):
+    test = epyccel_test(base.cast_float, lang=language_with_cuda)
     test.compare_epyccel( 4.5 )
-    test = epyccel_test(base.cast_int_to_float, lang=language)
+    test = epyccel_test(base.cast_int_to_float, lang=language_with_cuda)
     test.compare_epyccel( 4 )
 
-def test_if_0_int(language):
-    test = epyccel_test(base.if_0_int, lang=language)
+def test_if_0_int(language_with_cuda):
+    test = epyccel_test(base.if_0_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
     test.compare_epyccel( 0 )
 
-def test_if_0_real(language):
-    test = epyccel_test(base.if_0_real, lang=language)
+def test_if_0_real(language_with_cuda):
+    test = epyccel_test(base.if_0_real, lang=language_with_cuda)
     test.compare_epyccel( 22.3 )
     test.compare_epyccel( 0.0 )
 
-def test_same_int(language):
-    test = epyccel_test(base.is_same_int, lang=language)
+def test_same_int(language_with_cuda):
+    test = epyccel_test(base.is_same_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
-    test = epyccel_test(base.isnot_same_int, lang=language)
+    test = epyccel_test(base.isnot_same_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
 
-def test_same_float(language):
-    test = epyccel_test(base.is_same_float, lang=language)
+def test_same_float(language_with_cuda):
+    test = epyccel_test(base.is_same_float, lang=language_with_cuda)
     test.compare_epyccel( 22.2 )
-    test = epyccel_test(base.isnot_same_float, lang=language)
+    test = epyccel_test(base.isnot_same_float, lang=language_with_cuda)
     test.compare_epyccel( 22.2 )
 
 @pytest.mark.parametrize( 'language', [
@@ -150,28 +150,28 @@ def test_same_complex(language):
     test = epyccel_test(base.isnot_same_complex, lang=language)
     test.compare_epyccel( complex(2,3) )
 
-def test_is_types(language):
-    test = epyccel_test(base.is_types, lang=language)
+def test_is_types(language_with_cuda):
+    test = epyccel_test(base.is_types, lang=language_with_cuda)
     test.compare_epyccel( 1, 1.0 )
 
-def test_isnot_types(language):
-    test = epyccel_test(base.isnot_types, lang=language)
+def test_isnot_types(language_with_cuda):
+    test = epyccel_test(base.isnot_types, lang=language_with_cuda)
     test.compare_epyccel( 1, 1.0 )
 
-def test_none_is_none(language):
-    test = epyccel_test(base.none_is_none, lang=language)
+def test_none_is_none(language_with_cuda):
+    test = epyccel_test(base.none_is_none, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_none_isnot_none(language):
-    test = epyccel_test(base.none_isnot_none, lang=language)
+def test_none_isnot_none(language_with_cuda):
+    test = epyccel_test(base.none_isnot_none, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_pass_if(language):
-    test = epyccel_test(base.pass_if, lang=language)
+def test_pass_if(language_with_cuda):
+    test = epyccel_test(base.pass_if, lang=language_with_cuda)
     test.compare_epyccel(2)
 
-def test_pass2_if(language):
-    test = epyccel_test(base.pass2_if, lang=language)
+def test_pass2_if(language_with_cuda):
+    test = epyccel_test(base.pass2_if, lang=language_with_cuda)
     test.compare_epyccel(0.2)
     test.compare_epyccel(0.0)
 
@@ -192,15 +192,15 @@ def test_use_optional(language):
     test.compare_epyccel()
     test.compare_epyccel(6)
 
-def test_none_equality(language):
-    test = epyccel_test(base.none_equality, lang=language)
+def test_none_equality(language_with_cuda):
+    test = epyccel_test(base.none_equality, lang=language_with_cuda)
     test.compare_epyccel()
     test.compare_epyccel(6)
 
-def test_none_none_equality(language):
-    test = epyccel_test(base.none_none_equality, lang=language)
+def test_none_none_equality(language_with_cuda):
+    test = epyccel_test(base.none_none_equality, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_none_literal_equality(language):
-    test = epyccel_test(base.none_literal_equality, lang=language)
+def test_none_literal_equality(language_with_cuda):
+    test = epyccel_test(base.none_literal_equality, lang=language_with_cuda)
     test.compare_epyccel()

From 2c58573886bbd50fc6c715f66de673c743ca2af5 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 15 May 2024 12:58:50 +0100
Subject: [PATCH 100/130] Fix import handling (#49)

This pull request fixes https://github.com/pyccel/pyccel-cuda/issues/48, by implementing a tiny wrapper for CUDA and a wrapper for non-CUDA functionalities only with external 'C'.

**Commit Summary**

-    Implemented new header printer for CUDA.
-    Added CUDA wrapper assignment
-    Instead of wrapping all local headers, wrap only C functions with extern 'C'

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
---
 CHANGELOG.md                                |  3 +-
 pyccel/codegen/printing/cucode.py           | 45 ++++++++----
 pyccel/codegen/python_wrapper.py            |  4 ++
 pyccel/codegen/wrapper/cuda_to_c_wrapper.py | 78 +++++++++++++++++++++
 tests/epyccel/modules/cuda_module.py        | 13 ++++
 tests/epyccel/test_epyccel_modules.py       | 13 ++++
 6 files changed, 142 insertions(+), 14 deletions(-)
 create mode 100644 pyccel/codegen/wrapper/cuda_to_c_wrapper.py
 create mode 100644 tests/epyccel/modules/cuda_module.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 18fc7f947d..72a8f22ded 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,8 @@ All notable changes to this project will be documented in this file.
 
 ### Added
 
--   #32 : add support for `nvcc` Compiler and `cuda` language as a possible option.
+-   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
+-   #48 : Fix incorrect handling of imports in `cuda`.
 
 ## \[UNRELEASED\]
 
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 86146b065b..277d2a3a6a 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -52,19 +52,7 @@ def _print_Module(self, expr):
 
         # Print imports last to be sure that all additional_imports have been collected
         imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()]
-        c_headers_imports = ''
-        local_imports = ''
-
-        for imp in imports:
-            if imp.source in c_library_headers:
-                c_headers_imports += self._print(imp)
-            else:
-                local_imports += self._print(imp)
-
-        imports = f'{c_headers_imports}\
-                    extern "C"{{\n\
-                    {local_imports}\
-                    }}'
+        imports = ''.join(self._print(i) for i in imports)
 
         code = f'{imports}\n\
                  {global_variables}\n\
@@ -72,3 +60,34 @@ def _print_Module(self, expr):
 
         self.exit_scope()
         return code
+
+    def _print_ModuleHeader(self, expr):
+        self.set_scope(expr.module.scope)
+        self._in_header = True
+        name = expr.module.name
+
+        funcs = ""
+        cuda_headers = ""
+        for f in expr.module.funcs:
+            if not f.is_inline:
+                if 'kernel' in f.decorators:  # Checking for 'kernel' decorator
+                    cuda_headers += self.function_signature(f) + ';\n'
+                else:
+                    funcs += self.function_signature(f) + ';\n'
+        global_variables = ''.join('extern '+self._print(d) for d in expr.module.declarations if not d.variable.is_private)
+        # Print imports last to be sure that all additional_imports have been collected
+        imports = [*expr.module.imports, *self._additional_imports.values()]
+        imports = ''.join(self._print(i) for i in imports)
+
+        self._in_header = False
+        self.exit_scope()
+        function_declaration = f'{cuda_headers}\n\
+                    extern "C"{{\n\
+                    {funcs}\
+                    }}\n'
+        return '\n'.join((f"#ifndef {name.upper()}_H",
+                          f"#define {name.upper()}_H",
+                          global_variables,
+                          function_declaration,
+                          "#endif // {name.upper()}_H\n"))
+
diff --git a/pyccel/codegen/python_wrapper.py b/pyccel/codegen/python_wrapper.py
index 9437727042..62c303fa64 100644
--- a/pyccel/codegen/python_wrapper.py
+++ b/pyccel/codegen/python_wrapper.py
@@ -13,6 +13,7 @@
 from pyccel.codegen.printing.fcode               import FCodePrinter
 from pyccel.codegen.wrapper.fortran_to_c_wrapper import FortranToCWrapper
 from pyccel.codegen.wrapper.c_to_python_wrapper  import CToPythonWrapper
+from pyccel.codegen.wrapper.cuda_to_c_wrapper    import CudaToCWrapper
 from pyccel.codegen.utilities                    import recompile_object
 from pyccel.codegen.utilities                    import copy_internal_library
 from pyccel.codegen.utilities                    import internal_libs
@@ -144,6 +145,9 @@ def create_shared_library(codegen,
                 verbose=verbose)
         timings['Bind C wrapping'] = time.time() - start_bind_c_compiling
         c_ast = bind_c_mod
+    elif language == 'cuda':
+        wrapper = CudaToCWrapper()
+        c_ast = wrapper.wrap(codegen.ast)
     else:
         c_ast = codegen.ast
 
diff --git a/pyccel/codegen/wrapper/cuda_to_c_wrapper.py b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py
new file mode 100644
index 0000000000..c0e24c7c09
--- /dev/null
+++ b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Module describing the code-wrapping class : CudaToPythonWrapper
+which creates an interface exposing Cuda code to C.
+"""
+
+from pyccel.ast.bind_c      import BindCModule
+from pyccel.errors.errors   import Errors
+from pyccel.ast.bind_c      import BindCVariable
+from .wrapper               import Wrapper
+
+errors = Errors()
+
+class CudaToCWrapper(Wrapper):
+    """
+    Class for creating a wrapper exposing Cuda code to C.
+
+    While CUDA is typically compatible with C by default.
+    this wrapper becomes necessary in scenarios where specific adaptations
+    or modifications are required to ensure seamless integration with C.
+    """
+
+    def _wrap_Module(self, expr):
+        """
+        Create a Module which is compatible with C.
+
+        Create a Module which provides an interface between C and the
+        Module described by expr.
+
+        Parameters
+        ----------
+        expr : pyccel.ast.core.Module
+            The module to be wrapped.
+
+        Returns
+        -------
+        pyccel.ast.core.BindCModule
+            The C-compatible module.
+        """
+        init_func = expr.init_func
+        if expr.interfaces:
+            errors.report("Interface wrapping is not yet supported for Cuda",
+                      severity='warning', symbol=expr)
+        if expr.classes:
+            errors.report("Class wrapping is not yet supported for Cuda",
+                      severity='warning', symbol=expr)
+
+        variables = [self._wrap(v) for v in expr.variables]
+
+        return BindCModule(expr.name, variables, expr.funcs,
+                init_func=init_func,
+                scope = expr.scope,
+                original_module=expr)
+
+    def _wrap_Variable(self, expr):
+        """
+        Create all objects necessary to expose a module variable to C.
+
+        Create and return the objects which must be printed in the wrapping
+        module in order to expose the variable to C
+
+        Parameters
+        ----------
+        expr : pyccel.ast.variables.Variable
+            The module variable.
+
+        Returns
+        -------
+        pyccel.ast.core.BindCVariable
+            The C-compatible variable. which must be printed in
+            the wrapping module to expose the variable.
+        """
+        return expr.clone(expr.name, new_class = BindCVariable)
+
diff --git a/tests/epyccel/modules/cuda_module.py b/tests/epyccel/modules/cuda_module.py
new file mode 100644
index 0000000000..bb7ae6b98a
--- /dev/null
+++ b/tests/epyccel/modules/cuda_module.py
@@ -0,0 +1,13 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import numpy as np
+
+g = np.float64(9.81)
+r0 = np.float32(1.0)
+rmin = 0.01
+rmax = 1.0
+
+skip_centre = True
+
+method = 3
+
+tiny = np.int32(4)
diff --git a/tests/epyccel/test_epyccel_modules.py b/tests/epyccel/test_epyccel_modules.py
index ad8ae0bd75..223f741bf0 100644
--- a/tests/epyccel/test_epyccel_modules.py
+++ b/tests/epyccel/test_epyccel_modules.py
@@ -200,3 +200,16 @@ def test_awkward_names(language):
     assert mod.function() == modnew.function()
     assert mod.pure() == modnew.pure()
     assert mod.allocate(1) == modnew.allocate(1)
+
+def test_cuda_module(language_with_cuda):
+    import modules.cuda_module as mod
+
+    modnew = epyccel(mod, language=language_with_cuda)
+
+    atts = ('g', 'r0', 'rmin', 'rmax', 'skip_centre',
+            'method', 'tiny')
+    for att in atts:
+        mod_att = getattr(mod, att)
+        modnew_att = getattr(modnew, att)
+        assert mod_att == modnew_att
+        assert type(mod_att) is type(modnew_att)

From 0d154f8466a6faba4785c9eb644de33b86b9b300 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Thu, 27 Jun 2024 20:31:46 +0100
Subject: [PATCH 101/130] Add support for kernels (#42)

This pull request addresses issue #28 by implementing a new feature in
Pyccel that allows users to define custom GPU kernels. The syntax for
creating these kernels is inspired by Numba. and I also need to fix
issue #45 for testing purposes

**Commit Summary**

- Introduced KernelCall class
- Added cuda printer methods _print_KernelCall and _print_FunctionDef to
generate the corresponding CUDA representation for both kernel calls and
definitions
- Added IndexedFunctionCall  represents an indexed function call
- Added CUDA module and cuda.synchronize()
- Fixing a bug that I found in the header: it does not import the
necessary header for the used function

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
Co-authored-by: Emily Bourne <emily.bourne@epfl.ch>
---
 .dict_custom.txt                              |   1 +
 CHANGELOG.md                                  |   2 +
 docs/cuda.md                                  |  23 +++
 pyccel/ast/core.py                            |  37 ++++
 pyccel/ast/cuda.py                            |  65 +++++++
 pyccel/ast/cudaext.py                         |  42 +++++
 pyccel/ast/utilities.py                       |   4 +-
 pyccel/codegen/printing/cucode.py             |  46 ++++-
 pyccel/cuda/__init__.py                       |  10 +
 pyccel/cuda/cuda_sync_primitives.py           |  16 ++
 pyccel/decorators.py                          |  32 ++++
 pyccel/errors/messages.py                     |   8 +
 pyccel/parser/semantic.py                     |  84 ++++++++-
 pyccel/parser/syntactic.py                    |   4 +
 tests/conftest.py                             |   9 +
 tests/cuda/test_kernel_semantic.py            | 176 ++++++++++++++++++
 tests/pyccel/scripts/kernel/hello_kernel.py   |  19 ++
 .../scripts/kernel/kernel_name_collision.py   |   8 +
 tests/pyccel/test_pyccel.py                   |  22 ++-
 19 files changed, 599 insertions(+), 9 deletions(-)
 create mode 100644 docs/cuda.md
 create mode 100644 pyccel/ast/cuda.py
 create mode 100644 pyccel/ast/cudaext.py
 create mode 100644 pyccel/cuda/__init__.py
 create mode 100644 pyccel/cuda/cuda_sync_primitives.py
 create mode 100644 tests/cuda/test_kernel_semantic.py
 create mode 100644 tests/pyccel/scripts/kernel/hello_kernel.py
 create mode 100644 tests/pyccel/scripts/kernel/kernel_name_collision.py

diff --git a/.dict_custom.txt b/.dict_custom.txt
index 161337d33b..6ddf80b1ff 100644
--- a/.dict_custom.txt
+++ b/.dict_custom.txt
@@ -120,3 +120,4 @@ indexable
 traceback
 STC
 gFTL
+GPUs
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 72a8f22ded..aacdd31b87 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file.
 
 -   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
 -   #48 : Fix incorrect handling of imports in `cuda`.
+-   #42 : Add support for custom kernel in`cuda`.
+-   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
 
 ## \[UNRELEASED\]
 
diff --git a/docs/cuda.md b/docs/cuda.md
new file mode 100644
index 0000000000..de30d52b80
--- /dev/null
+++ b/docs/cuda.md
@@ -0,0 +1,23 @@
+# Getting started GPU
+
+Pyccel now supports NVIDIA CUDA, empowering users to accelerate numerical computations on GPUs seamlessly. With Pyccel's high-level syntax and automatic code generation, harnessing the power of CUDA becomes effortless. This documentation provides a quick guide to enabling CUDA in Pyccel
+
+## Cuda Decorator
+
+### kernel
+
+The kernel decorator allows the user to declare a CUDA kernel. The kernel can be defined in Python, and the syntax is similar to that of Numba.
+
+```python
+from pyccel.decorators import kernel
+
+@kernel
+def my_kernel():
+    pass
+
+blockspergrid = 1
+threadsperblock = 1
+# Call your kernel function
+my_kernel[blockspergrid, threadsperblock]()
+
+```
\ No newline at end of file
diff --git a/pyccel/ast/core.py b/pyccel/ast/core.py
index 8981ddc160..2758b75be2 100644
--- a/pyccel/ast/core.py
+++ b/pyccel/ast/core.py
@@ -73,6 +73,7 @@
     'If',
     'IfSection',
     'Import',
+    'IndexedFunctionCall',
     'InProgram',
     'InlineFunctionDef',
     'Interface',
@@ -2065,6 +2066,42 @@ def _ignore(cls, c):
         """
         return c is None or isinstance(c, (FunctionDef, *cls._ignored_types))
 
+class IndexedFunctionCall(FunctionCall):
+    """
+    Represents an indexed function call in the code.
+
+    Class representing indexed function calls, encapsulating all
+    relevant information for such calls within the code base.
+
+    Parameters
+    ----------
+    func : FunctionDef
+        The function being called.
+
+    args : iterable of FunctionCallArgument
+        The arguments passed to the function.
+
+    indexes : iterable of TypedAstNode
+        The indexes of the function call.
+
+    current_function : FunctionDef, optional
+        The function where the call takes place.
+    """
+    __slots__ = ('_indexes',)
+    _attribute_nodes = FunctionCall._attribute_nodes + ('_indexes',)
+    def __init__(self, func, args, indexes, current_function = None):
+        self._indexes = indexes
+        super().__init__(func, args, current_function)
+
+    @property
+    def indexes(self):
+        """
+        Indexes of function call.
+
+        Represents the indexes of the function call
+        """
+        return self._indexes
+
 class ConstructorCall(FunctionCall):
 
     """
diff --git a/pyccel/ast/cuda.py b/pyccel/ast/cuda.py
new file mode 100644
index 0000000000..f1e50ef7f0
--- /dev/null
+++ b/pyccel/ast/cuda.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+CUDA Module
+This module provides a collection of classes and utilities for CUDA programming.
+"""
+from pyccel.ast.core import FunctionCall
+
+__all__ = (
+    'KernelCall',
+)
+
+class KernelCall(FunctionCall):
+    """
+    Represents a kernel function call in the code.
+
+    The class serves as a representation of a kernel
+    function call within the codebase.
+
+    Parameters
+    ----------
+    func : FunctionDef
+        The definition of the function being called.
+
+    args : iterable of FunctionCallArgument
+        The arguments passed to the function.
+
+    num_blocks : TypedAstNode
+        The number of blocks. These objects must have a primitive type of `PrimitiveIntegerType`.
+
+    tp_block : TypedAstNode
+        The number of threads per block. These objects must have a primitive type of `PrimitiveIntegerType`.
+
+    current_function : FunctionDef, optional
+        The function where the call takes place.
+    """
+    __slots__ = ('_num_blocks','_tp_block')
+    _attribute_nodes = (*FunctionCall._attribute_nodes, '_num_blocks', '_tp_block')
+
+    def __init__(self, func, args, num_blocks, tp_block, current_function = None):
+        self._num_blocks = num_blocks
+        self._tp_block = tp_block
+        super().__init__(func, args, current_function)
+
+    @property
+    def num_blocks(self):
+        """
+        The number of blocks in the kernel being called.
+
+        The number of blocks in the kernel being called.
+        """
+        return self._num_blocks
+
+    @property
+    def tp_block(self):
+        """
+        The number of threads per block.
+
+        The number of threads per block.
+        """
+        return self._tp_block
+
diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
new file mode 100644
index 0000000000..b540f20993
--- /dev/null
+++ b/pyccel/ast/cudaext.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+CUDA Extension Module
+Provides CUDA functionality for code generation.
+"""
+from .internals      import PyccelFunction
+
+from .datatypes      import VoidType
+from .core           import Module, PyccelFunctionDef
+
+__all__ = (
+    'CudaSynchronize',
+)
+
+class CudaSynchronize(PyccelFunction):
+    """
+    Represents a call to Cuda.synchronize for code generation.
+
+    This class serves as a representation of the Cuda.synchronize method.
+    """
+    __slots__ = ()
+    _attribute_nodes = ()
+    _shape     = None
+    _class_type = VoidType()
+    def __init__(self):
+        super().__init__()
+
+cuda_funcs = {
+    'synchronize'       : PyccelFunctionDef('synchronize' , CudaSynchronize),
+}
+
+cuda_mod = Module('cuda',
+    variables=[],
+    funcs=cuda_funcs.values(),
+    imports=[]
+)
+
diff --git a/pyccel/ast/utilities.py b/pyccel/ast/utilities.py
index 1e6c0422ab..e5cd77b168 100644
--- a/pyccel/ast/utilities.py
+++ b/pyccel/ast/utilities.py
@@ -25,6 +25,7 @@
 from .literals      import LiteralInteger, LiteralEllipsis, Nil
 from .mathext       import math_mod
 from .sysext        import sys_mod
+from .cudaext       import cuda_mod
 
 from .numpyext      import (NumpyEmpty, NumpyArray, numpy_mod,
                             NumpyTranspose, NumpyLinspace)
@@ -49,7 +50,8 @@
 decorators_mod = Module('decorators',(),
         funcs = [PyccelFunctionDef(d, PyccelFunction) for d in pyccel_decorators.__all__])
 pyccel_mod = Module('pyccel',(),(),
-        imports = [Import('decorators', decorators_mod)])
+        imports = [Import('decorators', decorators_mod),
+                    Import('cuda', cuda_mod)])
 
 # TODO add documentation
 builtin_import_registry = Module('__main__',
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 277d2a3a6a..cd26843017 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -9,11 +9,12 @@
 enabling the direct translation of high-level Pyccel expressions into CUDA code.
 """
 
-from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers
+from pyccel.codegen.printing.ccode  import CCodePrinter
 
-from pyccel.ast.core        import Import, Module
+from pyccel.ast.core                import Import, Module
+from pyccel.ast.literals            import Nil
 
-from pyccel.errors.errors   import Errors
+from pyccel.errors.errors           import Errors
 
 
 errors = Errors()
@@ -61,6 +62,44 @@ def _print_Module(self, expr):
         self.exit_scope()
         return code
 
+    def function_signature(self, expr, print_arg_names = True):
+        """
+        Get the Cuda representation of the function signature.
+
+        Extract from the function definition `expr` all the
+        information (name, input, output) needed to create the
+        function signature and return a string describing the
+        function.
+        This is not a declaration as the signature does not end
+        with a semi-colon.
+
+        Parameters
+        ----------
+        expr : FunctionDef
+            The function definition for which a signature is needed.
+
+        print_arg_names : bool, default : True
+            Indicates whether argument names should be printed.
+
+        Returns
+        -------
+        str
+            Signature of the function.
+        """
+        cuda_decorater = '__global__' if 'kernel' in expr.decorators else ''
+        c_function_signature = super().function_signature(expr, print_arg_names)
+        return f'{cuda_decorater} {c_function_signature}'
+
+    def _print_KernelCall(self, expr):
+        func = expr.funcdef
+        args = [a.value or Nil() for a in expr.args]
+
+        args = ', '.join(self._print(a) for a in args)
+        return f"{func.name}<<<{expr.num_blocks}, {expr.tp_block}>>>({args});\n"
+
+    def _print_CudaSynchronize(self, expr):
+        return 'cudaDeviceSynchronize();\n'
+
     def _print_ModuleHeader(self, expr):
         self.set_scope(expr.module.scope)
         self._in_header = True
@@ -87,6 +126,7 @@ def _print_ModuleHeader(self, expr):
                     }}\n'
         return '\n'.join((f"#ifndef {name.upper()}_H",
                           f"#define {name.upper()}_H",
+                          imports,
                           global_variables,
                           function_declaration,
                           "#endif // {name.upper()}_H\n"))
diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py
new file mode 100644
index 0000000000..e8542ad5d5
--- /dev/null
+++ b/pyccel/cuda/__init__.py
@@ -0,0 +1,10 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+    This module is for exposing the CudaSubmodule functions.
+"""
+from .cuda_sync_primitives    import synchronize
+
+__all__ = ['synchronize']
diff --git a/pyccel/cuda/cuda_sync_primitives.py b/pyccel/cuda/cuda_sync_primitives.py
new file mode 100644
index 0000000000..f3442fe9e2
--- /dev/null
+++ b/pyccel/cuda/cuda_sync_primitives.py
@@ -0,0 +1,16 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+This submodule contains CUDA methods for Pyccel.
+"""
+
+
+def synchronize():
+    """
+    Synchronize CUDA device execution.
+
+    Synchronize CUDA device execution.
+    """
+
diff --git a/pyccel/decorators.py b/pyccel/decorators.py
index 1f640043db..77717a991f 100644
--- a/pyccel/decorators.py
+++ b/pyccel/decorators.py
@@ -19,6 +19,7 @@
     'sympy',
     'template',
     'types',
+    'kernel'
 )
 
 
@@ -109,3 +110,34 @@ def allow_negative_index(f,*args):
     def identity(f):
         return f
     return identity
+
+def kernel(f):
+    """
+    Decorator for marking a Python function as a kernel.
+
+    This class serves as a decorator to mark a Python function
+    as a kernel function, typically used for GPU computations.
+    This allows the function to be indexed with the number of blocks and threads.
+
+    Parameters
+    ----------
+    f : function
+        The function to which the decorator is applied.
+
+    Returns
+    -------
+    KernelAccessor
+        A class representing the kernel function.
+    """
+    class KernelAccessor:
+        """
+        Class representing the kernel function.
+
+        Class representing the kernel function.
+        """
+        def __init__(self, f):
+            self._f = f
+        def __getitem__(self, args):
+            return self._f
+
+    return KernelAccessor(f)
diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py
index 79eccc1df2..09966d810c 100644
--- a/pyccel/errors/messages.py
+++ b/pyccel/errors/messages.py
@@ -162,3 +162,11 @@
 WRONG_LINSPACE_ENDPOINT = 'endpoint argument must be boolean'
 NON_LITERAL_KEEP_DIMS = 'keep_dims argument must be a literal, otherwise rank is unknown'
 NON_LITERAL_AXIS = 'axis argument must be a literal, otherwise pyccel cannot determine which dimension to operate on'
+MISSING_KERNEL_CONFIGURATION = 'Kernel launch configuration not specified'
+INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch'
+INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call'
+INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call'
+
+
+
+
diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py
index f6e9f34f39..29e851b20c 100644
--- a/pyccel/parser/semantic.py
+++ b/pyccel/parser/semantic.py
@@ -116,6 +116,8 @@
 from pyccel.ast.variable import IndexedElement, AnnotatedPyccelSymbol
 from pyccel.ast.variable import DottedName, DottedVariable
 
+from pyccel.ast.cuda import     KernelCall
+
 from pyccel.errors.errors import Errors
 from pyccel.errors.errors import PyccelSemanticError
 
@@ -133,7 +135,9 @@
         PYCCEL_RESTRICTION_LIST_COMPREHENSION_LIMITS, PYCCEL_RESTRICTION_LIST_COMPREHENSION_SIZE,
         UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE,
         PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT,
-        FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC)
+        FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET,
+        MISSING_KERNEL_CONFIGURATION,
+        INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK)
 
 from pyccel.parser.base      import BasicParser
 from pyccel.parser.syntactic import SyntaxParser
@@ -1139,6 +1143,67 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun
 
             return new_expr
 
+    def _handle_kernel(self, expr, func, args):
+        """
+        Create the node representing the kernel function call.
+
+        Create a FunctionCall or an instance of a PyccelInternalFunction
+        from the function information and arguments.
+
+        Parameters
+        ----------
+        expr : IndexedFunctionCall
+               Node has all the information about the function call.
+
+        func : FunctionDef | Interface | PyccelInternalFunction type
+               The function being called.
+
+        args : iterable of FunctionCallArgument
+               The arguments passed to the function.
+
+        Returns
+        -------
+        Pyccel.ast.cuda.KernelCall
+            The semantic representation of the kernel call.
+        """
+        if len(expr.indexes) != 2:
+            errors.report(INVALID_KERNEL_LAUNCH_CONFIG,
+                    symbol=expr,
+                    severity='fatal')
+        if len(func.results):
+            errors.report(f"cuda kernel function '{func.name}' returned a value in violation of the laid-down specification",
+                         symbol=expr,
+                         severity='fatal')
+        if isinstance(func, FunctionDef) and len(args) != len(func.arguments):
+            errors.report(f"{len(args)} argument types given, but function takes {len(func.arguments)} arguments",
+                symbol=expr,
+                severity='fatal')
+        if not isinstance(expr.indexes[0], (LiteralInteger)):
+            if isinstance(expr.indexes[0], PyccelSymbol):
+                num_blocks = self.get_variable(expr.indexes[0])
+
+                if not isinstance(num_blocks.dtype, PythonNativeInt):
+                    errors.report(INVALID_KERNEL_CALL_BP_GRID,
+                    symbol = expr,
+                    severity='fatal')
+            else:
+                errors.report(INVALID_KERNEL_CALL_BP_GRID,
+                    symbol = expr,
+                    severity='fatal')
+        if not isinstance(expr.indexes[1], (LiteralInteger)):
+            if isinstance(expr.indexes[1], PyccelSymbol):
+                tp_block = self.get_variable(expr.indexes[1])
+                if not isinstance(tp_block.dtype, PythonNativeInt):
+                    errors.report(INVALID_KERNEL_CALL_TP_BLOCK,
+                    symbol = expr,
+                    severity='fatal')
+            else:
+                errors.report(INVALID_KERNEL_CALL_TP_BLOCK,
+                    symbol = expr,
+                    severity='fatal')
+        new_expr = KernelCall(func, args, expr.indexes[0], expr.indexes[1])
+        return new_expr
+
     def _sort_function_call_args(self, func_args, args):
         """
         Sort and add the missing call arguments to match the arguments in the function definition.
@@ -2852,6 +2917,23 @@ def _visit_Lambda(self, expr):
                 expr = Lambda(tuple(expr.variables), expr_new)
         return expr
 
+    def _visit_IndexedFunctionCall(self, expr):
+        name     = expr.funcdef
+        name = self.scope.get_expected_name(name)
+        func     = self.scope.find(name, 'functions')
+        args = self._handle_function_args(expr.args)
+
+        if func is None:
+            return errors.report(UNDEFINED_FUNCTION, symbol=expr.funcdef,
+                    bounding_box=(self.current_ast_node.lineno, self.current_ast_node.col_offset),
+                    severity='fatal')
+
+        func = self._annotate_the_called_function_def(func)
+        if 'kernel' in func.decorators :
+            return self._handle_kernel(expr, func, args)
+        else:
+            return errors.report("Unknown function type",
+                symbol=expr, severity='fatal')
     def _visit_FunctionCall(self, expr):
         name     = expr.funcdef
         try:
diff --git a/pyccel/parser/syntactic.py b/pyccel/parser/syntactic.py
index 318b765703..0cfe895605 100644
--- a/pyccel/parser/syntactic.py
+++ b/pyccel/parser/syntactic.py
@@ -64,6 +64,8 @@
 
 from pyccel.ast.type_annotations import SyntacticTypeAnnotation, UnionTypeAnnotation
 
+from pyccel.ast.core import IndexedFunctionCall
+
 from pyccel.parser.base        import BasicParser
 from pyccel.parser.extend_tree import extend_tree
 from pyccel.parser.utilities   import get_default_path
@@ -1101,6 +1103,8 @@ def _visit_Call(self, stmt):
         elif isinstance(func, DottedName):
             func_attr = FunctionCall(func.name[-1], args)
             func = DottedName(*func.name[:-1], func_attr)
+        elif isinstance(func,IndexedElement):
+            func = IndexedFunctionCall(func.base, args, func.indices)
         else:
             raise NotImplementedError(f' Unknown function type {type(func)}')
 
diff --git a/tests/conftest.py b/tests/conftest.py
index a5082ef6e8..4e74d1ec7a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -59,6 +59,15 @@ def pytest_runtest_teardown(item, nextitem):
 
 def pytest_addoption(parser):
     parser.addoption("--developer-mode", action="store_true", default=github_debugging, help="Show tracebacks when pyccel errors are raised")
+    parser.addoption("--gpu_available", action="store_true",
+                default=False, help="enable GPU tests")
+
+def pytest_generate_tests(metafunc):
+    if "gpu_available" in metafunc.fixturenames:
+        if metafunc.config.getoption("gpu_available"):
+            metafunc.parametrize("gpu_available", [True])
+        else:
+            metafunc.parametrize("gpu_available", [False])
 
 def pytest_sessionstart(session):
     # setup_stuff
diff --git a/tests/cuda/test_kernel_semantic.py b/tests/cuda/test_kernel_semantic.py
new file mode 100644
index 0000000000..00b74c3bea
--- /dev/null
+++ b/tests/cuda/test_kernel_semantic.py
@@ -0,0 +1,176 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import pytest
+
+from pyccel import epyccel
+from pyccel.decorators import kernel
+from pyccel.errors.errors import Errors, PyccelSemanticError
+from pyccel.errors.messages import (INVALID_KERNEL_CALL_TP_BLOCK,
+                                    INVALID_KERNEL_CALL_BP_GRID,
+                                    INVALID_KERNEL_LAUNCH_CONFIG)
+
+
+@pytest.mark.cuda
+def test_invalid_block_number():
+    def invalid_block_number():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1.0
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_block_number, language="cuda")
+
+    assert errors.has_errors()
+
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_CALL_BP_GRID == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_thread_per_block():
+    def invalid_thread_per_block():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1.0
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_thread_per_block, language="cuda")
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_CALL_TP_BLOCK == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_launch_config_high():
+    def invalid_launch_config_high():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        third_param = 1
+        kernel_call[blocks_per_grid, threads_per_block, third_param]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_launch_config_high, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_launch_config_low():
+    def invalid_launch_config_low():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        kernel_call[blocks_per_grid]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_launch_config_low, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_arguments_for_kernel_call():
+    def invalid_arguments():
+        @kernel
+        def kernel_call(arg : int):
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_arguments, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "0 argument types given, but function takes 1 arguments" == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_arguments_for_kernel_call_2():
+    def invalid_arguments_():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block](1)
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_arguments_, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "1 argument types given, but function takes 0 arguments" == error_info.message
+
+
+@pytest.mark.cuda
+def test_kernel_return():
+    def kernel_return():
+        @kernel
+        def kernel_call():
+            return 7
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block](1)
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(kernel_return, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "cuda kernel function 'kernel_call' returned a value in violation of the laid-down specification" == error_info.message
diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py
new file mode 100644
index 0000000000..b6901b25a1
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/hello_kernel.py
@@ -0,0 +1,19 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+from pyccel import cuda
+
+@kernel
+def say_hello(its_morning : bool):
+    if(its_morning):
+        print("Hello and Good morning")
+    else:
+        print("Hello and Good afternoon")
+
+def f():
+    its_morning = True
+    say_hello[1,1](its_morning)
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
+
diff --git a/tests/pyccel/scripts/kernel/kernel_name_collision.py b/tests/pyccel/scripts/kernel/kernel_name_collision.py
new file mode 100644
index 0000000000..ac7abe25ae
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/kernel_name_collision.py
@@ -0,0 +1,8 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+
+@kernel
+def do():
+    pass
+
+do[1,1]()
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index ec1e846549..b4757a3c31 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -294,7 +294,7 @@ def compare_pyth_fort_output( p_output, f_output, dtype=float, language=None):
 #------------------------------------------------------------------------------
 def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True,
         cwd = None, pyccel_commands = "", output_dtype = float,
-        language = None, output_dir = None):
+        language = None, output_dir = None, execute_code = True):
     """
     Run pyccel and compare the output to ensure that the results
     are equivalent
@@ -394,13 +394,14 @@ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True,
             compile_fortran(cwd, output_test_file, dependencies)
         elif language == 'c':
             compile_c(cwd, output_test_file, dependencies)
-
-    lang_out = get_lang_output(output_test_file, language)
-    compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language)
+    if  execute_code:
+        lang_out = get_lang_output(output_test_file, language)
+        compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language)
 
 #==============================================================================
 # UNIT TESTS
 #==============================================================================
+
 def test_relative_imports_in_project(language):
 
     base_dir = os.path.dirname(os.path.realpath(__file__))
@@ -728,6 +729,19 @@ def test_multiple_results(language):
 def test_elemental(language):
     pyccel_test("scripts/decorators_elemental.py", language = language)
 
+#------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_hello_kernel(gpu_available):
+    types = str
+    pyccel_test("scripts/kernel/hello_kernel.py",
+            language="cuda", output_dtype=types , execute_code=gpu_available)
+
+#------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_kernel_collision(gpu_available):
+    pyccel_test("scripts/kernel/kernel_name_collision.py",
+            language="cuda", execute_code=gpu_available)
+
 #------------------------------------------------------------------------------
 def test_print_strings(language):
     types = str

From 2ffa7fc9f3b632f149a40c4bc8e7f84dee3cc636 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 3 Jul 2024 17:37:02 +0100
Subject: [PATCH 102/130] Updated CUDA Name Clash Checker By Added
 CUDA-specific keywords (#60)

This pull request addresses issue #59 by adding more CUDA-specific
keywords to enhance the checking of variable/function names and prevent
name clashes

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
---
 CHANGELOG.md                              |  1 +
 pyccel/naming/cudanameclashchecker.py     | 36 ++++++++++++++++++++++-
 pyccel/naming/languagenameclashchecker.py |  5 ++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index aacdd31b87..ed9ebc8e21 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file.
 
 -   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
 -   #48 : Fix incorrect handling of imports in `cuda`.
+-   #59 : Updated `cuda` clash checker.
 -   #42 : Add support for custom kernel in`cuda`.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
 
diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py
index 971204e912..c7aaa4952f 100644
--- a/pyccel/naming/cudanameclashchecker.py
+++ b/pyccel/naming/cudanameclashchecker.py
@@ -16,6 +16,7 @@ class CudaNameClashChecker(LanguageNameClashChecker):
     verify that they do not cause name clashes. Name clashes may be due to
     new variables, or due to the use of reserved keywords.
     """
+
     # Keywords as mentioned on https://en.cppreference.com/w/c/keyword
     keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const',
         'continue', 'default', 'do', 'double', 'else', 'enum',
@@ -37,7 +38,40 @@ class CudaNameClashChecker(LanguageNameClashChecker):
         'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX',
         'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer',
         'get_index', 'numpy_to_ndarray_strides',
-        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'])
+        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'
+        '__global__', '__device__', '__host__','__constant__', '__shared__',
+        '__managed__','threadIdx', 'blockIdx', 'blockDim', 'gridDim',
+        'warpSize', 'cudaMalloc', 'cudaFree', 'cudaMemcpy', 'cudaMemset',
+        'cudaMallocHost', 'cudaFreeHost', 'cudaMallocPitch',
+        'cudaMallocArray', 'cudaFreeArray', 'cudaHostAlloc',
+        'cudaHostRegister', 'cudaHostUnregister', 'cudaHostGetDevicePointer',
+        'cudaHostGetFlags', 'cudaDeviceSynchronize', 'cudaDeviceReset',
+        'cudaSetDevice', 'cudaGetDeviceCount', 'cudaGetDeviceProperties',
+        'cudaChooseDevice', 'cudaSetDeviceFlags', 'cudaGetDevice',
+        'cudaStreamCreate', 'cudaStreamDestroy', 'cudaStreamSynchronize',
+        'cudaStreamWaitEvent', 'cudaEventCreate', 'cudaEventDestroy', 'cudaEventRecord',
+        'cudaEventSynchronize', 'cudaEventElapsedTime', 'cuInit', 'cuDeviceGet',
+        'cuDeviceGetCount', 'cuDeviceGetName',
+        'cuDeviceComputeCapability', 'cuCtxCreate', 'cuCtxDestroy',
+        'cuCtxSynchronize', 'cuModuleLoad', 'cuModuleUnload',
+        'cuModuleGetFunction', 'cuModuleGetGlobal', 'cuModuleGetTexRef',
+        'cuMemAlloc', 'cuMemFree', 'cuMemcpyHtoD', 'cuMemcpyDtoH',
+        'cuMemcpyDtoD', 'cuMemcpyHtoDAsync', 'cuMemcpyDtoHAsync',
+        'cuMemcpyDtoDAsync', 'cuMemsetD8', 'cuMemsetD16', 'cuMemsetD32',
+        'cuMemsetD2D8', 'cuMemsetD2D16', 'cuMemsetD2D32', 'cuParamSetSize',
+        'cuParamSeti', 'cuParamSetf', 'cuParamSetv', 'cuLaunch', 'cuLaunchGrid',
+        'cuLaunchGridAsync', 'cuEventCreate', 'cuEventRecord', 'cuEventQuery',
+        'cuEventSynchronize', 'cuEventDestroy', 'cuEventElapsedTime',
+        'cuStreamCreate', 'cuStreamQuery', 'cuStreamSynchronize',
+        'cuStreamDestroy', 'cuFuncSetBlockShape', 'cuFuncSetSharedSize',
+        'cuFuncGetAttribute', 'cuTexRefCreate', 'cuTexRefDestroy',
+        'cuTexRefSetArray', 'cuTexRefSetAddress', 'cuTexRefSetAddress2D',
+        'cuTexRefSetFormat', 'cuTexRefSetAddressMode', 'cuTexRefSetFilterMode',
+        'cuTexRefSetFlags', 'cuTexRefGetAddress', 'cuTexRefGetArray',
+        'cuTexRefGetAddressMode', 'cuTexRefGetFilterMode', 'cuTexRefGetFormat',
+        'cuTexRefGetFlags', 'cuLaunchKernel', 'cuOccupancyMaxActiveBlocksPerMultiprocessor',
+        'cuOccupancyMaxPotentialBlockSize', 'cuOccupancyMaxPotentialBlockSizeWithFlags'
+    ])
 
     def has_clash(self, name, symbols):
         """
diff --git a/pyccel/naming/languagenameclashchecker.py b/pyccel/naming/languagenameclashchecker.py
index fa672a905b..d6415e6449 100644
--- a/pyccel/naming/languagenameclashchecker.py
+++ b/pyccel/naming/languagenameclashchecker.py
@@ -19,6 +19,11 @@ class LanguageNameClashChecker(metaclass = Singleton):
     """
     keywords = None
 
+    def __init__(self): #pylint: disable=useless-parent-delegation
+        # This __init__ function is required so the ArgumentSingleton can
+        # always detect a signature
+        super().__init__()
+
     def _get_collisionless_name(self, name, symbols):
         """
         Get a name which doesn't collision with keywords or symbols.

From 8eef19d1f0eb02737e9fa3b7d265c80c128880d2 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 3 Jul 2024 18:04:22 +0100
Subject: [PATCH 103/130] add handle for custom device (#61)

This pull request addresses issue
https://github.com/pyccel/pyccel-cuda/issues/41 by implementing a new
feature in Pyccel that allows users to define a custom device

**Commit Summary**

- Adding handler for custom device and its code generation.
- Adding test

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
---
 CHANGELOG.md                               |  1 +
 docs/cuda.md                               | 25 ++++++++++++++++-
 pyccel/codegen/printing/cucode.py          |  7 ++---
 pyccel/decorators.py                       | 19 +++++++++++++
 pyccel/errors/messages.py                  |  2 +-
 pyccel/parser/semantic.py                  |  7 ++++-
 tests/cuda/test_device_semantic.py         | 31 ++++++++++++++++++++++
 tests/pyccel/scripts/kernel/device_test.py | 18 +++++++++++++
 tests/pyccel/test_pyccel.py                |  8 ++++++
 9 files changed, 112 insertions(+), 6 deletions(-)
 create mode 100644 tests/cuda/test_device_semantic.py
 create mode 100644 tests/pyccel/scripts/kernel/device_test.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ed9ebc8e21..13434a32b2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file.
 -   #59 : Updated `cuda` clash checker.
 -   #42 : Add support for custom kernel in`cuda`.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
+-   #41 : Add support for custom device in`cuda`.
 
 ## \[UNRELEASED\]
 
diff --git a/docs/cuda.md b/docs/cuda.md
index de30d52b80..7643a4ac02 100644
--- a/docs/cuda.md
+++ b/docs/cuda.md
@@ -20,4 +20,27 @@ threadsperblock = 1
 # Call your kernel function
 my_kernel[blockspergrid, threadsperblock]()
 
-```
\ No newline at end of file
+```
+
+### device
+
+Device functions are similar to kernels, but are executed within the context of a kernel. They can be called only from kernels or device functions, and are typically used for operations that are too small to justify launching a separate kernel, or for operations that need to be performed repeatedly within the context of a kernel.
+
+```python
+from pyccel.decorators import device, kernel
+
+@device
+def add(x, y):
+    return x + y
+
+@kernel
+def my_kernel():
+    x = 1
+    y = 2
+    z = add(x, y)
+    print(z)
+
+my_kernel[1, 1]()
+
+```
+
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index cd26843017..7c01d93c47 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -86,9 +86,10 @@ def function_signature(self, expr, print_arg_names = True):
         str
             Signature of the function.
         """
-        cuda_decorater = '__global__' if 'kernel' in expr.decorators else ''
+        cuda_decorator = '__global__' if 'kernel' in expr.decorators else \
+        '__device__' if 'device' in expr.decorators else ''
         c_function_signature = super().function_signature(expr, print_arg_names)
-        return f'{cuda_decorater} {c_function_signature}'
+        return f'{cuda_decorator} {c_function_signature}'
 
     def _print_KernelCall(self, expr):
         func = expr.funcdef
@@ -109,7 +110,7 @@ def _print_ModuleHeader(self, expr):
         cuda_headers = ""
         for f in expr.module.funcs:
             if not f.is_inline:
-                if 'kernel' in f.decorators:  # Checking for 'kernel' decorator
+                if 'kernel' in f.decorators or 'device' in f.decorators:
                     cuda_headers += self.function_signature(f) + ';\n'
                 else:
                     funcs += self.function_signature(f) + ';\n'
diff --git a/pyccel/decorators.py b/pyccel/decorators.py
index 77717a991f..ff413fe443 100644
--- a/pyccel/decorators.py
+++ b/pyccel/decorators.py
@@ -11,6 +11,7 @@
 __all__ = (
     'allow_negative_index',
     'bypass',
+    'device',
     'elemental',
     'inline',
     'private',
@@ -141,3 +142,21 @@ def __getitem__(self, args):
             return self._f
 
     return KernelAccessor(f)
+
+def device(f):
+    """
+    Decorator for marking a function as a GPU device function.
+
+    This decorator is used to mark a Python function as a GPU device function.
+
+    Parameters
+    ----------
+    f : Function
+        The function to be marked as a device.
+
+    Returns
+    -------
+    f
+        The function marked as a device.
+    """
+    return f
diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py
index 09966d810c..5fe622c29b 100644
--- a/pyccel/errors/messages.py
+++ b/pyccel/errors/messages.py
@@ -166,7 +166,7 @@
 INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch'
 INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call'
 INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call'
-
+INVAlID_DEVICE_CALL = 'A function decorated with "device" should be called only from a "kernel" or another "device" function.'
 
 
 
diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py
index 29e851b20c..6b4143b442 100644
--- a/pyccel/parser/semantic.py
+++ b/pyccel/parser/semantic.py
@@ -136,9 +136,10 @@
         UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE,
         PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT,
         FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET,
-        MISSING_KERNEL_CONFIGURATION,
+        MISSING_KERNEL_CONFIGURATION, INVAlID_DEVICE_CALL,
         INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK)
 
+
 from pyccel.parser.base      import BasicParser
 from pyccel.parser.syntactic import SyntaxParser
 
@@ -1061,6 +1062,10 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun
         FunctionCall/PyccelFunction
             The semantic representation of the call.
         """
+
+        if isinstance(func, FunctionDef) and 'device' in func.decorators:
+            if 'kernel' not in self.scope.decorators and 'device' not in self.scope.decorators:
+                errors.report(INVAlID_DEVICE_CALL,symbol=expr, severity='fatal')
         if isinstance(func, PyccelFunctionDef):
             if use_build_functions:
                 annotation_method = '_build_' + func.cls_name.__name__
diff --git a/tests/cuda/test_device_semantic.py b/tests/cuda/test_device_semantic.py
new file mode 100644
index 0000000000..5723991961
--- /dev/null
+++ b/tests/cuda/test_device_semantic.py
@@ -0,0 +1,31 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import pytest
+
+from pyccel import epyccel
+from pyccel.decorators import device
+from pyccel.errors.errors import Errors, PyccelSemanticError
+from pyccel.errors.messages import (INVAlID_DEVICE_CALL,)
+
+
+@pytest.mark.cuda
+def test_invalid_device_call():
+    def invalid_device_call():
+        @device
+        def device_call():
+            pass
+        def fake_kernel_call():
+            device_call()
+
+        fake_kernel_call()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_device_call, language="cuda")
+
+    assert errors.has_errors()
+
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert INVAlID_DEVICE_CALL == error_info.message
diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py
new file mode 100644
index 0000000000..a4762a6242
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/device_test.py
@@ -0,0 +1,18 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import device, kernel
+from pyccel import cuda
+
+@device
+def device_call():
+    print("Hello from device")
+
+@kernel
+def kernel_call():
+    device_call()
+
+def f():
+    kernel_call[1,1]()
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index b4757a3c31..2d55c6e1cb 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -742,6 +742,14 @@ def test_kernel_collision(gpu_available):
     pyccel_test("scripts/kernel/kernel_name_collision.py",
             language="cuda", execute_code=gpu_available)
 
+#------------------------------------------------------------------------------
+
+@pytest.mark.cuda
+def test_device_call(gpu_available):
+    types = str
+    pyccel_test("scripts/kernel/device_test.py",
+            language="cuda", output_dtype=types, execute_code=gpu_available)
+
 #------------------------------------------------------------------------------
 def test_print_strings(language):
     types = str

From c02b661fa59e1e2a68dfe356a12dd303652047fc Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 14:42:08 +0100
Subject: [PATCH 104/130] include cuda_ndarrays.cu in package distribution

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index adffdd13dd..6aec79003f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -59,6 +59,7 @@ include = [
   "pyccel/stdlib/**/*.f90",
   "pyccel/extensions/STC/include",
   "pyccel/extensions/gFTL/include/v2"
+  "pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu"
 ]
 exclude = [
     "pyccel/extensions/STC/src",

From bbbf6f8de4c39a30a8a031b1ab765f357c1186e0 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 14:51:19 +0100
Subject: [PATCH 105/130] add , to cuda include

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6aec79003f..29fbba3da1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,7 +58,7 @@ include = [
   "pyccel/stdlib/**/*.c",
   "pyccel/stdlib/**/*.f90",
   "pyccel/extensions/STC/include",
-  "pyccel/extensions/gFTL/include/v2"
+  "pyccel/extensions/gFTL/include/v2",
   "pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu"
 ]
 exclude = [

From 88d74d18ae55388819fbae41330e892ef050d270 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 15:17:56 +0100
Subject: [PATCH 106/130] cleaning up my PR

---
 pyccel/codegen/compiling/compilers.py |  2 --
 pyccel/codegen/utilities.py           |  2 --
 pyccel/stdlib/ndarrays/ndarrays.c     | 15 ++++++++-------
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py
index 9ba44fbb23..0d496b9e8d 100644
--- a/pyccel/codegen/compiling/compilers.py
+++ b/pyccel/codegen/compiling/compilers.py
@@ -329,8 +329,6 @@ def compile_module(self, compile_obj, output_folder, verbose = False):
         verbose : bool
             Indicates whether additional output should be shown.
         """
-        print("Compiling : ", compile_obj.source)
-        print(os.path.exists(compile_obj.source))
         if not compile_obj.has_target_file:
             return
 
diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py
index 8553b89195..61b19d93d1 100644
--- a/pyccel/codegen/utilities.py
+++ b/pyccel/codegen/utilities.py
@@ -112,7 +112,6 @@ def copy_internal_library(lib_folder, pyccel_dirpath, extra_files = None):
     str
         The location that the files were copied to.
     """
-    print("copy_internal_library : ", lib_folder)
     # get lib path (stdlib_path/lib_name or ext_path/lib_name)
     if lib_folder in external_libs:
         lib_path = os.path.join(ext_path, external_libs[lib_folder], lib_folder)
@@ -148,7 +147,6 @@ def copy_internal_library(lib_folder, pyccel_dirpath, extra_files = None):
             dst_files = [os.path.relpath(os.path.join(root, f), lib_dest_path) \
                     for root, dirs, files in os.walk(lib_dest_path) \
                     for f in files if not f.endswith('.lock')]
-            print("Created : ", dst_files)
             # Create any requested extra files
             if extra_files:
                 for filename, contents in extra_files.items():
diff --git a/pyccel/stdlib/ndarrays/ndarrays.c b/pyccel/stdlib/ndarrays/ndarrays.c
index 784b222cbb..1bbad5e29e 100644
--- a/pyccel/stdlib/ndarrays/ndarrays.c
+++ b/pyccel/stdlib/ndarrays/ndarrays.c
@@ -493,7 +493,7 @@ bool is_same_shape(t_ndarray a, t_ndarray b)
     }
     return (true);
 }
-
+#ifndef __NVCC__
 #define COPY_DATA_FROM_(SRC_TYPE) \
     void copy_data_from_##SRC_TYPE(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp) \
     { \
@@ -588,24 +588,24 @@ bool is_same_shape(t_ndarray a, t_ndarray b)
                 if(elem_wise_cp == false)\
                 { \
                     for(int64_t i = 0; i < src.length; i++) \
-                        dest->nd_cfloat[i + offset] = src.nd_cfloat[i]; \
+                        dest->nd_cfloat[i + offset] = (float complex)src.nd_##SRC_TYPE[i]; \
                 }\
                 else \
                 {\
                     for(int64_t i = 0; i < src.length; i++) \
-                        dest->nd_cfloat[element_index(*dest, i, dest->nd) + offset] = src.nd_cfloat[element_index(src, i, src.nd)]; \
+                        dest->nd_cfloat[element_index(*dest, i, dest->nd) + offset] = (float complex)src.nd_##SRC_TYPE[element_index(src, i, src.nd)]; \
                 }\
                 break; \
             case nd_cdouble: \
                 if(elem_wise_cp == false)\
                 { \
                     for(int64_t i = 0; i < src.length; i++) \
-                        dest->nd_cdouble[i + offset] = src.nd_cdouble[i]; \
+                        dest->nd_cdouble[i + offset] = (double complex)src.nd_##SRC_TYPE[i]; \
                 }\
                 else \
                 {\
                     for(int64_t i = 0; i < src.length; i++) \
-                        dest->nd_cdouble[element_index(*dest, i, dest->nd) + offset] = src.nd_cdouble[element_index(src, i, src.nd)]; \
+                        dest->nd_cdouble[element_index(*dest, i, dest->nd) + offset] = (double complex)src.nd_##SRC_TYPE[element_index(src, i, src.nd)]; \
                 }\
                 break; \
         } \
@@ -618,10 +618,8 @@ COPY_DATA_FROM_(int32)
 COPY_DATA_FROM_(int64)
 COPY_DATA_FROM_(float)
 COPY_DATA_FROM_(double)
-#ifndef __NVCC__
 COPY_DATA_FROM_(cfloat)
 COPY_DATA_FROM_(cdouble)
-#endif
 
 void copy_data(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp)
 {
@@ -666,6 +664,8 @@ void copy_data(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp
     }
 }
 
+#=
+
 void array_copy_data(t_ndarray *dest, t_ndarray src, uint32_t offset)
 {
     unsigned char *d = (unsigned char*)dest->raw_data;
@@ -682,6 +682,7 @@ void array_copy_data(t_ndarray *dest, t_ndarray src, uint32_t offset)
         copy_data(&dest, src, offset, true);
     }
 }
+#endif
 
 /*
 ** sum of ndarray

From 270594970309953fc9600ed21a6d83d9cbe7498a Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 15:26:03 +0100
Subject: [PATCH 107/130] cleaning up my PR

---
 pyccel/ast/class_defs.py          | 2 --
 pyccel/stdlib/ndarrays/ndarrays.c | 1 -
 tests/pyccel/test_pyccel.py       | 5 ++---
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/pyccel/ast/class_defs.py b/pyccel/ast/class_defs.py
index 10dec4694e..4318cd1575 100644
--- a/pyccel/ast/class_defs.py
+++ b/pyccel/ast/class_defs.py
@@ -266,8 +266,6 @@ def get_cls_base(class_type):
         return CudaArrayClass
     elif isinstance(class_type, (NumpyNumericType, NumpyNDArrayType)):
         return NumpyArrayClass
-    
-
     # elif isinstance(class_type, StackArrayType):
     elif isinstance(class_type, TupleType):
         return TupleClass
diff --git a/pyccel/stdlib/ndarrays/ndarrays.c b/pyccel/stdlib/ndarrays/ndarrays.c
index 1bbad5e29e..c6502b93bb 100644
--- a/pyccel/stdlib/ndarrays/ndarrays.c
+++ b/pyccel/stdlib/ndarrays/ndarrays.c
@@ -664,7 +664,6 @@ void copy_data(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp
     }
 }
 
-#=
 
 void array_copy_data(t_ndarray *dest, t_ndarray src, uint32_t offset)
 {
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index e37286c401..689d74de3f 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -62,9 +62,8 @@ def compile_pyccel(path_dir, test_file, options = ""):
     cmd = [shutil.which("pyccel"), test_file]
     if options != "":
         cmd += options.strip().split()
-    p = subprocess.run(cmd, capture_output = True, universal_newlines=True, cwd=path_dir)
-    print(p.stdout)
-    print(p.stderr)
+    p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir)
+    p.wait()
     assert p.returncode==0
 
 #------------------------------------------------------------------------------

From 77a93e743302b38c94e5973d18ffd1307b8a0a7b Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 15:53:36 +0100
Subject: [PATCH 108/130] cleaning up my PR

---
 pyccel/codegen/printing/ccode.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index 676d3b37a4..3b1e613da3 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -1327,10 +1327,13 @@ def get_declare_type(self, expr):
             if isinstance(expr.class_type, (HomogeneousSetType, HomogeneousListType)):
                 dtype = self.get_c_type(expr.class_type)
                 return dtype
-            if isinstance(expr.class_type,(HomogeneousTupleType, NumpyNDArrayType, CudaArrayType)):
+            if isinstance(expr.class_type,(HomogeneousTupleType, NumpyNDArrayType)):
                 if expr.rank > 15:
                     errors.report(UNSUPPORTED_ARRAY_RANK, symbol=expr, severity='fatal')
                 self.add_import(c_imports['ndarrays'])
+                dtype = 't_ndarray'
+            elif isinstance(expr.class_type, CudaArrayType):
+                self.add_import(c_imports['ndarrays'])
                 self.add_import(c_imports['cuda_ndarrays'])
                 dtype = 't_ndarray'
             else:

From ff74a4a403ce0a967739a0ae0823258dd9b36a26 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 16:27:46 +0100
Subject: [PATCH 109/130] enable import cucomplex in ndarrays header

---
 pyccel/stdlib/ndarrays/ndarrays.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pyccel/stdlib/ndarrays/ndarrays.h b/pyccel/stdlib/ndarrays/ndarrays.h
index 2e1b8e793d..c2f1293bef 100644
--- a/pyccel/stdlib/ndarrays/ndarrays.h
+++ b/pyccel/stdlib/ndarrays/ndarrays.h
@@ -10,7 +10,10 @@
 # include <stdbool.h>
 # include <stdint.h>
 # include <math.h>
-#include <cuComplex.h>
+
+# ifdef __NVCC__
+    #include <cuComplex.h>
+# endif
 
 /* mapping the function array_fill to the correct type */
 # define array_fill(c, arr) _Generic((c), int64_t : _array_fill_int64,\

From e138ae50cc2888cdf5d970f556483b40f77ce199 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Fri, 19 Jul 2024 17:02:30 +0100
Subject: [PATCH 110/130] cleaning up my PR

---
 pyccel/codegen/utilities.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py
index 61b19d93d1..07be774063 100644
--- a/pyccel/codegen/utilities.py
+++ b/pyccel/codegen/utilities.py
@@ -43,7 +43,7 @@
     "cwrapper"     : ("cwrapper", CompileObj("cwrapper.c",folder="cwrapper", accelerators=('python',))),
     "numpy_f90"    : ("numpy", CompileObj("numpy_f90.f90",folder="numpy")),
     "numpy_c"      : ("numpy", CompileObj("numpy_c.c",folder="numpy")),
-    "STC_Extensions" : ("STC_Extensions", CompileObj("Set_Extensions.h",folder="STC_Extensions", has_target_file = False)),
+    "Set_extensions" : ("STC_Extensions", CompileObj("Set_Extensions.h", folder="STC_Extensions", has_target_file = False)),
 }
 internal_libs["cwrapper_ndarrays"] = ("cwrapper_ndarrays", CompileObj("cwrapper_ndarrays.c",folder="cwrapper_ndarrays",
                                                              accelerators = ('python',),

From 8c55fd9fd3056a32a6ddce5ba05925a048a6bd93 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Sun, 21 Jul 2024 18:26:55 +0100
Subject: [PATCH 111/130] adding test for cuda array addition

---
 pyccel/codegen/printing/cucode.py              |  5 +++--
 .../scripts/kernel/cuda_host_array_addition.py | 18 ++++++++++++++++++
 tests/pyccel/test_pyccel.py                    |  6 ++++++
 3 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 tests/pyccel/scripts/kernel/cuda_host_array_addition.py

diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 830af67dab..d182e67593 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -147,7 +147,8 @@ def _print_Allocate(self, expr):
             dtype = self.find_in_ndarray_type_registry(numpy_precision_map[(variable.dtype.primitive_type, variable.dtype.precision)])
         else:
             raise NotImplementedError(f"Don't know how to index {variable.class_type} type")
-        shape_Assign = "int64_t shape_Assign [] = {" + shape + "};\n"
+        shape_Assign = f"int64_t shape_Assign_{expr.variable.name} [] = {{{shape}}};\n"
+        
         is_view = 'false' if variable.on_heap else 'true'
         memory_location = variable.class_type.memory_location
         if memory_location in ('device', 'host'):
@@ -155,7 +156,7 @@ def _print_Allocate(self, expr):
         else:
             memory_location = 'managedMemory'
         self.add_import(c_imports['cuda_ndarrays'])
-        alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank},  shape_Assign, {dtype}, {is_view},{memory_location});\n"
+        alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank},  shape_Assign_{expr.variable.name}, {dtype}, {is_view},{memory_location});\n"
         return f'{shape_Assign} {alloc_code}'
 
     def _print_Deallocate(self, expr):
diff --git a/tests/pyccel/scripts/kernel/cuda_host_array_addition.py b/tests/pyccel/scripts/kernel/cuda_host_array_addition.py
new file mode 100644
index 0000000000..3734f45c7c
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/cuda_host_array_addition.py
@@ -0,0 +1,18 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel import cuda
+
+def addition_cuda_host_arrays():
+    a = cuda.host_empty(3)
+    b = cuda.host_empty(3)
+
+    for i in range(3):
+        b[i] = 1
+        a[i] = 1
+
+    for i in range(3):
+        b[i] += a[i]
+
+    print(b)
+
+if __name__ == '__main__':
+    addition_cuda_host_arrays()
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index 689d74de3f..695a3f612f 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -750,7 +750,13 @@ def test_host_array(gpu_available):
             language="cuda", output_dtype=types, execute_code=gpu_available)
 
 #------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_cuda_host_array_addition(gpu_available):
+    types = float
+    pyccel_test("scripts/kernel/cuda_host_array_addition.py",
+            language="cuda", output_dtype=types, execute_code=gpu_available)
 
+#------------------------------------------------------------------------------
 @pytest.mark.cuda
 def test_device_call(gpu_available):
     types = str

From d5b733f4567f15842d95e93d7ac4967523d6c38d Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Sun, 21 Jul 2024 21:55:56 +0100
Subject: [PATCH 112/130] refactoring the code

---
 pyccel/ast/cudaext.py             | 10 ----------
 pyccel/codegen/printing/ccode.py  |  4 ----
 pyccel/codegen/printing/cucode.py |  4 +---
 3 files changed, 1 insertion(+), 17 deletions(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index f890fc7999..a97623ac15 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -49,16 +49,6 @@ class CudaNewarray(PyccelFunction):
     __slots__ = ('_class_type', '_init_dtype', '_memory_location')
     name = 'newarray'
 
-    @property
-    def init_dtype(self):
-        """
-        The dtype provided to the function when it was initialised in Python.
-
-        The dtype provided to the function when it was initialised in Python.
-        If no dtype was provided then this should equal `None`.
-        """
-        return self._init_dtype
-
     def __init__(self, *args ,class_type, init_dtype, memory_location):
         self._class_type = class_type
         self._init_dtype = init_dtype
diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index 3b1e613da3..4dd87b0514 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -1332,10 +1332,6 @@ def get_declare_type(self, expr):
                     errors.report(UNSUPPORTED_ARRAY_RANK, symbol=expr, severity='fatal')
                 self.add_import(c_imports['ndarrays'])
                 dtype = 't_ndarray'
-            elif isinstance(expr.class_type, CudaArrayType):
-                self.add_import(c_imports['ndarrays'])
-                self.add_import(c_imports['cuda_ndarrays'])
-                dtype = 't_ndarray'
             else:
                 errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal')
         elif not isinstance(class_type, CustomDataType):
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index d182e67593..de3f97a4a1 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -143,12 +143,10 @@ def _print_Allocate(self, expr):
         shape = ", ".join(self._print(i) for i in expr.shape)
         if isinstance(variable.class_type, CudaArrayType):
             dtype = self.find_in_ndarray_type_registry(variable.dtype)
-        elif isinstance(variable.class_type, HomogeneousContainerType):
-            dtype = self.find_in_ndarray_type_registry(numpy_precision_map[(variable.dtype.primitive_type, variable.dtype.precision)])
         else:
             raise NotImplementedError(f"Don't know how to index {variable.class_type} type")
         shape_Assign = f"int64_t shape_Assign_{expr.variable.name} [] = {{{shape}}};\n"
-        
+
         is_view = 'false' if variable.on_heap else 'true'
         memory_location = variable.class_type.memory_location
         if memory_location in ('device', 'host'):

From 1fcb3a2d22d39f110648711446059011f36afeb1 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Mon, 22 Jul 2024 11:19:18 +0100
Subject: [PATCH 113/130] adding test for cuda 2d array addition

---
 pyccel/ast/cudaext.py                         |  8 ------
 pyccel/ast/cudatypes.py                       | 25 ++++++++++++++++++
 pyccel/ast/variable.py                        |  2 +-
 pyccel/codegen/printing/ccode.py              |  4 +++
 pyccel/codegen/printing/cucode.py             | 26 -------------------
 .../kernel/cuda_host_2d_array_addition.py     | 19 ++++++++++++++
 tests/pyccel/test_pyccel.py                   |  6 +++++
 7 files changed, 55 insertions(+), 35 deletions(-)
 create mode 100644 tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index a97623ac15..761454cae0 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -123,14 +123,6 @@ class CudaHostEmpty(CudaFull):
     def __init__(self, shape, dtype='float', order='C'):
         memory_location = 'host'
         super().__init__(shape, Nil(), dtype, order , memory_location)
-    @property
-    def fill_value(self):
-        """
-        The value with which the array will be filled on initialisation.
-
-        The value with which the array will be filled on initialisation.
-        """
-        return None
 
 class CudaSynchronize(PyccelFunction):
     """
diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index 70c1fc06c8..9d2cab79a9 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -98,7 +98,32 @@ def order(self):
         this function returns None.
         """
         return self._order
+    def switch_rank(self, new_rank, new_order = None):
+        """
+        Get a type which is identical to this type in all aspects except the rank and/or order.
+
+        Get a type which is identical to this type in all aspects except the rank and/or order.
+        The order must be provided if the rank is increased from 1. Otherwise it defaults to the
+        same order as the current type.
+
+        Parameters
+        ----------
+        new_rank : int
+            The rank of the new type.
 
+        new_order : str, optional
+            The order of the new type. This should be provided if the rank is increased from 1.
+
+        Returns
+        -------
+        PyccelType
+            The new type.
+        """
+        if new_rank == 0:
+            return self.element_type
+        else:
+            new_order = (new_order or self._order) if new_rank > 1 else None
+            return CudaArrayType(self.element_type, new_rank, new_order, self.memory_location)
     def __repr__(self):
         dims = ','.join(':'*self._container_rank)
         order_str = f'(order={self._order})' if self._order else ''
diff --git a/pyccel/ast/variable.py b/pyccel/ast/variable.py
index 051cf631b7..e55f54b9ab 100644
--- a/pyccel/ast/variable.py
+++ b/pyccel/ast/variable.py
@@ -798,7 +798,7 @@ class IndexedElement(TypedAstNode):
     _attribute_nodes = ('_label', '_indices', '_shape')
 
     def __init__(self, base, *indices):
-
+        
         if not indices:
             raise IndexError('Indexed needs at least one index.')
 
diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index 4dd87b0514..3b1e613da3 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -1332,6 +1332,10 @@ def get_declare_type(self, expr):
                     errors.report(UNSUPPORTED_ARRAY_RANK, symbol=expr, severity='fatal')
                 self.add_import(c_imports['ndarrays'])
                 dtype = 't_ndarray'
+            elif isinstance(expr.class_type, CudaArrayType):
+                self.add_import(c_imports['ndarrays'])
+                self.add_import(c_imports['cuda_ndarrays'])
+                dtype = 't_ndarray'
             else:
                 errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal')
         elif not isinstance(class_type, CustomDataType):
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index de3f97a4a1..980a9d52ad 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -167,32 +167,6 @@ def _print_Deallocate(self, expr):
             return f"cuda_free_host({var_code});\n"
         else:
             return f"cuda_free({var_code});\n"
-    def get_declare_type(self, expr):
-        """
-        Get the string which describes the type in a declaration.
-
-        This function returns the code which describes the type
-        of the `expr` object such that the declaration can be written as:
-        `f"{self.get_declare_type(expr)} {expr.name}"`
-
-        Parameters
-        ----------
-        expr : Variable
-            The variable whose type should be described.
-
-        Returns
-        -------
-        str
-            The code describing the type.
-        """
-        class_type = expr.class_type
-        rank  = expr.rank
-        if not isinstance(class_type, CudaArrayType ) or rank <= 0:
-            return super().get_declare_type(expr)
-        self.add_import(c_imports['ndarrays'])
-        self.add_import(c_imports['cuda_ndarrays'])
-        dtype = 't_ndarray '
-        return dtype
 
     def _print_Assign(self, expr):
         rhs = expr.rhs
diff --git a/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py b/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py
new file mode 100644
index 0000000000..05fd29769f
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py
@@ -0,0 +1,19 @@
+from pyccel import cuda
+
+def addition_cuda_host_2Darrays():
+    a = cuda.host_empty((10,10))
+    b = cuda.host_empty((10,10))
+
+    for i in range(10):
+        for j in range(10):
+            a[i][j] = 1
+            b[i][j] = 1
+    b+=a
+    b+=1
+
+    for i in range(10):
+        for j in range(10):
+            print(b[i][j])
+if __name__ == '__main__':
+    addition_cuda_host_2Darrays()
+
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index 695a3f612f..f8638a3b3c 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -755,6 +755,12 @@ def test_cuda_host_array_addition(gpu_available):
     types = float
     pyccel_test("scripts/kernel/cuda_host_array_addition.py",
             language="cuda", output_dtype=types, execute_code=gpu_available)
+#------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_cuda_host_2d_array_addition(gpu_available):
+    types = float
+    pyccel_test("scripts/kernel/cuda_host_2d_array_addition.py",
+            language="cuda", output_dtype=types, execute_code=gpu_available)
 
 #------------------------------------------------------------------------------
 @pytest.mark.cuda

From c5c9db1e7e68671f88c88ec718bb1ca5edbb1f4f Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Mon, 22 Jul 2024 11:29:05 +0100
Subject: [PATCH 114/130] cleaning up my PR

---
 pyccel/ast/variable.py                                     | 2 +-
 pyccel/codegen/printing/cucode.py                          | 2 --
 tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py | 6 +++---
 tests/pyccel/scripts/kernel/cuda_host_array_addition.py    | 4 +---
 4 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/pyccel/ast/variable.py b/pyccel/ast/variable.py
index e55f54b9ab..051cf631b7 100644
--- a/pyccel/ast/variable.py
+++ b/pyccel/ast/variable.py
@@ -798,7 +798,7 @@ class IndexedElement(TypedAstNode):
     _attribute_nodes = ('_label', '_indices', '_shape')
 
     def __init__(self, base, *indices):
-        
+
         if not indices:
             raise IndexError('Indexed needs at least one index.')
 
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 980a9d52ad..bf43a12b1a 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -16,8 +16,6 @@
 
 from pyccel.errors.errors           import Errors
 from pyccel.ast.cudatypes           import CudaArrayType
-from pyccel.ast.datatypes           import HomogeneousContainerType
-from pyccel.ast.numpytypes          import numpy_precision_map
 from pyccel.ast.cudaext             import CudaFull
 from pyccel.codegen.printing.ccode  import c_imports
 
diff --git a/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py b/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py
index 05fd29769f..80d8785acd 100644
--- a/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py
+++ b/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py
@@ -1,3 +1,4 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
 from pyccel import cuda
 
 def addition_cuda_host_2Darrays():
@@ -11,9 +12,8 @@ def addition_cuda_host_2Darrays():
     b+=a
     b+=1
 
-    for i in range(10):
-        for j in range(10):
-            print(b[i][j])
+    print(b)
+
 if __name__ == '__main__':
     addition_cuda_host_2Darrays()
 
diff --git a/tests/pyccel/scripts/kernel/cuda_host_array_addition.py b/tests/pyccel/scripts/kernel/cuda_host_array_addition.py
index 3734f45c7c..3ad186fd4f 100644
--- a/tests/pyccel/scripts/kernel/cuda_host_array_addition.py
+++ b/tests/pyccel/scripts/kernel/cuda_host_array_addition.py
@@ -9,9 +9,7 @@ def addition_cuda_host_arrays():
         b[i] = 1
         a[i] = 1
 
-    for i in range(3):
-        b[i] += a[i]
-
+    b += a
     print(b)
 
 if __name__ == '__main__':

From bbd46e12c8b0597eb3b5ac12ff731aa4b08f47c7 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Mon, 22 Jul 2024 11:51:47 +0100
Subject: [PATCH 115/130] cleaning up my PR

---
 pyccel/ast/cudatypes.py          | 2 ++
 pyccel/codegen/printing/ccode.py | 1 -
 pyccel/parser/semantic.py        | 5 ++---
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index 9d2cab79a9..671a9e031e 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -16,6 +16,7 @@
 
 from .numpytypes    import NumpyNDArrayType
 
+__all__ = ('CudaArrayType',)
 
 class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton):
     """
@@ -36,6 +37,7 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton):
         The memory location of the new cuda array ('host' or 'device').
     """
     __slots__ = ('_element_type', '_container_rank', '_order', '_memory_location')
+    
 
     def __init__(self, dtype, rank, order, memory_location):
         assert isinstance(rank, int)
diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index 3b1e613da3..1ffc85cbd2 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -46,7 +46,6 @@
 from pyccel.ast.numpytypes import NumpyFloat32Type, NumpyFloat64Type, NumpyComplex64Type, NumpyComplex128Type
 from pyccel.ast.numpytypes import NumpyNDArrayType, numpy_precision_map
 from pyccel.ast.cudatypes  import CudaArrayType
-from pyccel.ast.cudaext    import CudaFull
 
 from pyccel.ast.type_annotations import VariableTypeAnnotation
 
diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py
index ee95eb041b..6b4143b442 100644
--- a/pyccel/parser/semantic.py
+++ b/pyccel/parser/semantic.py
@@ -3169,7 +3169,7 @@ def _visit_Assign(self, expr):
 
         elif isinstance(rhs, CodeBlock) and len(rhs.body)>1 and isinstance(rhs.body[1], FunctionalFor):
             return rhs
-        
+
         elif isinstance(rhs, FunctionCall):
             func = rhs.funcdef
             results = func.results
@@ -3202,7 +3202,7 @@ def _visit_Assign(self, expr):
                     d_var['memory_handling'] = arg.memory_handling
                     d_var['class_type'     ] = arg.class_type
                     d_var['cls_base'       ] = arg.cls_base
-        
+
         elif isinstance(rhs, NumpyTranspose):
             d_var  = self._infer_type(rhs)
             if d_var['memory_handling'] == 'alias' and not isinstance(lhs, IndexedElement):
@@ -3211,7 +3211,6 @@ def _visit_Assign(self, expr):
             if expr.lhs.is_temp:
                 return rhs
             else:
-                
                 raise NotImplementedError("Cannot assign result of a function without a return")
 
         else:

From 89cd3aab2e13fbffd1eb233c2b247b6df72b45fc Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Mon, 22 Jul 2024 11:55:31 +0100
Subject: [PATCH 116/130] cleaning up my PR

---
 pyccel/ast/cudatypes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index 671a9e031e..d3687629ab 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -37,7 +37,7 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton):
         The memory location of the new cuda array ('host' or 'device').
     """
     __slots__ = ('_element_type', '_container_rank', '_order', '_memory_location')
-    
+
 
     def __init__(self, dtype, rank, order, memory_location):
         assert isinstance(rank, int)

From 5c49cd383fb764a66cfc79c6b11cc9d1f48903c8 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Mon, 22 Jul 2024 14:56:07 +0100
Subject: [PATCH 117/130] cleaning up my PR

---
 pyccel/ast/cudatypes.py                                    | 7 ++-----
 tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py | 4 ++--
 tests/pyccel/scripts/kernel/cuda_host_array_addition.py    | 2 +-
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index d3687629ab..d48a3de7f3 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -121,11 +121,8 @@ def switch_rank(self, new_rank, new_order = None):
         PyccelType
             The new type.
         """
-        if new_rank == 0:
-            return self.element_type
-        else:
-            new_order = (new_order or self._order) if new_rank > 1 else None
-            return CudaArrayType(self.element_type, new_rank, new_order, self.memory_location)
+        new_order = (new_order or self._order) if new_rank > 1 else None
+        return CudaArrayType(self.element_type, new_rank, new_order, self.memory_location)
     def __repr__(self):
         dims = ','.join(':'*self._container_rank)
         order_str = f'(order={self._order})' if self._order else ''
diff --git a/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py b/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py
index 80d8785acd..308970e9d0 100644
--- a/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py
+++ b/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py
@@ -9,8 +9,8 @@ def addition_cuda_host_2Darrays():
         for j in range(10):
             a[i][j] = 1
             b[i][j] = 1
-    b+=a
-    b+=1
+    b = b + a
+    b = b + 1
 
     print(b)
 
diff --git a/tests/pyccel/scripts/kernel/cuda_host_array_addition.py b/tests/pyccel/scripts/kernel/cuda_host_array_addition.py
index 3ad186fd4f..d0f61881cf 100644
--- a/tests/pyccel/scripts/kernel/cuda_host_array_addition.py
+++ b/tests/pyccel/scripts/kernel/cuda_host_array_addition.py
@@ -9,7 +9,7 @@ def addition_cuda_host_arrays():
         b[i] = 1
         a[i] = 1
 
-    b += a
+    b = b + a
     print(b)
 
 if __name__ == '__main__':

From 4ff9ed2f09e905d46073c8c649130a01cb6144bd Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Mon, 22 Jul 2024 15:52:08 +0100
Subject: [PATCH 118/130] pdate Changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 13434a32b2..a3f9f8a996 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ All notable changes to this project will be documented in this file.
 -   #42 : Add support for custom kernel in`cuda`.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
 -   #41 : Add support for custom device in`cuda`.
+-   #64 : Add support for `cuda.device_empty` function.
 
 ## \[UNRELEASED\]
 

From 843429b00d8d7813679b6747c46c769bc4f98c39 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Mon, 22 Jul 2024 16:27:29 +0100
Subject: [PATCH 119/130] refactoring the code

---
 pyccel/ast/numpyext.py                 | 3 ++-
 pyccel/codegen/compiling/compilers.py  | 1 +
 pyccel/codegen/pipeline.py             | 1 +
 pyccel/codegen/printing/codeprinter.py | 1 +
 pyccel/errors/errors.py                | 1 +
 pyccel/naming/cudanameclashchecker.py  | 1 +
 6 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/pyccel/ast/numpyext.py b/pyccel/ast/numpyext.py
index eb1ee92e26..861b640282 100644
--- a/pyccel/ast/numpyext.py
+++ b/pyccel/ast/numpyext.py
@@ -626,6 +626,7 @@ def __init__(self, *args, class_type, init_dtype = None):
         assert isinstance(class_type, NumpyNDArrayType)
         self._init_dtype = init_dtype
         self._class_type = class_type # pylint: disable=no-member
+
         super().__init__(*args)
 
     @property
@@ -1312,12 +1313,12 @@ def __init__(self, shape, fill_value, dtype=None, order='C'):
 
         # Convert shape to PythonTuple
         shape = process_shape(False, shape)
+
         init_dtype = dtype
         # If there is no dtype, extract it from fill_value
         # TODO: must get dtype from an annotated node
         if dtype is None:
             dtype = fill_value.dtype
-
         dtype = process_dtype(dtype)
 
         # Cast fill_value to correct type
diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py
index 0d496b9e8d..d909a5036e 100644
--- a/pyccel/codegen/compiling/compilers.py
+++ b/pyccel/codegen/compiling/compilers.py
@@ -493,6 +493,7 @@ def run_command(cmd, verbose):
         cmd = [os.path.expandvars(c) for c in cmd]
         if verbose:
             print(' '.join(cmd))
+
         with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                 universal_newlines=True) as p:
             out, err = p.communicate()
diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py
index ff8d657704..eb357fab74 100644
--- a/pyccel/codegen/pipeline.py
+++ b/pyccel/codegen/pipeline.py
@@ -404,6 +404,7 @@ def get_module_dependencies(parser, deps):
                     verbose=verbose)
 
         timers["Compilation without wrapper"] = time.time() - start_compile_target_language
+
         # Create shared library
         generated_filepath, shared_lib_timers = create_shared_library(codegen,
                                                mod_obj,
diff --git a/pyccel/codegen/printing/codeprinter.py b/pyccel/codegen/printing/codeprinter.py
index 687dd2e378..277aa574dd 100644
--- a/pyccel/codegen/printing/codeprinter.py
+++ b/pyccel/codegen/printing/codeprinter.py
@@ -50,6 +50,7 @@ def doprint(self, expr):
 
         # Do the actual printing
         lines = self._print(expr).splitlines(True)
+
         # Format the output
         return ''.join(self._format_code(lines))
 
diff --git a/pyccel/errors/errors.py b/pyccel/errors/errors.py
index 96910b3dfa..b261a81830 100644
--- a/pyccel/errors/errors.py
+++ b/pyccel/errors/errors.py
@@ -345,6 +345,7 @@ def report(self,
                 traceback = ''.join(tb.format_stack(limit=5))
         else:
             traceback = None
+
         info = ErrorInfo(stage=pyccel_stage.current_stage,
                          filename=filename,
                          message=message,
diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py
index bd8aa0d2b6..c7aaa4952f 100644
--- a/pyccel/naming/cudanameclashchecker.py
+++ b/pyccel/naming/cudanameclashchecker.py
@@ -16,6 +16,7 @@ class CudaNameClashChecker(LanguageNameClashChecker):
     verify that they do not cause name clashes. Name clashes may be due to
     new variables, or due to the use of reserved keywords.
     """
+
     # Keywords as mentioned on https://en.cppreference.com/w/c/keyword
     keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const',
         'continue', 'default', 'do', 'double', 'else', 'enum',

From 0e9292e82b815d95c17d20533e8b5b98f27203a5 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 24 Jul 2024 16:17:52 +0100
Subject: [PATCH 120/130] Change doc of cuda+host_empty and fix some errors

---
 pyccel/ast/cudaext.py                        | 17 ++++++++++-------
 pyccel/ast/cudatypes.py                      | 12 +++++++-----
 pyccel/codegen/printing/ccode.py             |  8 +++++---
 pyccel/codegen/printing/cucode.py            |  9 ++++++---
 pyccel/cuda/cuda_arrays.py                   |  4 ++--
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu | 14 +++++++++-----
 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h  |  4 ++--
 7 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
index 761454cae0..21a18c1ba5 100644
--- a/pyccel/ast/cudaext.py
+++ b/pyccel/ast/cudaext.py
@@ -61,7 +61,7 @@ class CudaFull(CudaNewarray):
     Represents a call to `cuda.full` for code generation.
 
     Represents a call to the Cuda function `full` which creates an array
-    of a specified size and shape filled with a specified value.
+    filled with a specified value.
 
     Parameters
     ----------
@@ -109,14 +109,17 @@ class CudaHostEmpty(CudaFull):
 
     Parameters
     ----------
-    shape : tuple of int , int
-        The shape of the new array.
+    shape : TypedAstNode
+        Shape of the new array, e.g., ``(2, 3)`` or ``2``.
+        For a 1D array this is either a `LiteralInteger` or an expression.
+        For a cuda ND array this is a `TypedAstNode` with the class type HomogeneousTupleType.
 
-    dtype : PythonType, LiteralString, str
-        The actual dtype passed to the NumPy function.
+    dtype : PythonType, PyccelFunctionDef, LiteralString, str, optional
+        Datatype for the constructed array.
 
-    order : str , LiteralString
-        The order passed to the function defoulting to 'C'.
+    order : {'C', 'F'}, optional
+        Whether to store multidimensional data in C- or Fortran-contiguous
+        (row- or column-wise) order in memory.
     """
     __slots__ = ()
     name = 'empty'
diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py
index d48a3de7f3..1edbdb0ccc 100644
--- a/pyccel/ast/cudatypes.py
+++ b/pyccel/ast/cudatypes.py
@@ -4,7 +4,7 @@
 # This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
 # go to https://github.com/pyccel/pyccel/blob/devel/LICENSE for full license details.      #
 #------------------------------------------------------------------------------------------#
-""" Module containing types from the numpy module understood by pyccel
+""" Module containing types from the cuda module understood by pyccel
 """
 from functools import lru_cache
 import numpy as np
@@ -42,6 +42,7 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton):
     def __init__(self, dtype, rank, order, memory_location):
         assert isinstance(rank, int)
         assert order in (None, 'C', 'F')
+        assert memory_location in ('host', 'device')
 
         self._element_type = dtype
         self._container_rank = rank
@@ -60,6 +61,9 @@ def memory_location(self):
 
     @lru_cache
     def __add__(self, other):
+        if(isinstance(other, CudaArrayType)):
+            assert self.memory_location == other.memory_location
+
         test_type = np.zeros(1, dtype = pyccel_type_to_original_type[self.element_type])
         if isinstance(other, FixedSizeNumericType):
             comparison_type = pyccel_type_to_original_type[other]()
@@ -67,16 +71,14 @@ def __add__(self, other):
             comparison_type = np.zeros(1, dtype = pyccel_type_to_original_type[other.element_type])
         else:
             return NotImplemented
-        if(isinstance(other, CudaArrayType)):
-            assert self.memory_location == other.memory_location
 
         result_type = original_type_to_pyccel_type[np.result_type(test_type, comparison_type).type]
         rank = max(other.rank, self.rank)
         if rank < 2:
             order = None
         else:
-            other_f_contiguous = other.order in (None, 'F')
-            self_f_contiguous = self.order in (None, 'F')
+            other_f_contiguous = other.order == 'F'
+            self_f_contiguous = self.order == 'F'
             order = 'F' if other_f_contiguous and self_f_contiguous else 'C'
         return CudaArrayType(result_type, rank, order, self.memory_location)
 
diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
index 1ffc85cbd2..cdcc4b48a6 100644
--- a/pyccel/codegen/printing/ccode.py
+++ b/pyccel/codegen/printing/ccode.py
@@ -62,6 +62,7 @@
 from pyccel.codegen.printing.codeprinter import CodePrinter
 
 
+
 from pyccel.errors.errors   import Errors
 from pyccel.errors.messages import (PYCCEL_RESTRICTION_TODO, INCOMPATIBLE_TYPEVAR_TO_FUNC,
                                     PYCCEL_RESTRICTION_IS_ISNOT, UNSUPPORTED_ARRAY_RANK)
@@ -230,8 +231,7 @@
 import_dict = {'omp_lib' : 'omp' }
 
 c_imports = {n : Import(n, Module(n, (), ())) for n in
-                ['cuda_ndarrays',
-                 'stdlib',
+                ['stdlib',
                  'math',
                  'string',
                  'ndarrays',
@@ -244,6 +244,7 @@
                  'assert',
                  'numpy_c']}
 
+
 import_header_guard_prefix = {'Set_extensions' : '_TOOLS_SET'}
 
 class CCodePrinter(CodePrinter):
@@ -1319,6 +1320,7 @@ def get_declare_type(self, expr):
         >>> self.get_declare_type(v)
         't_ndarray*'
         """
+        from pyccel.codegen.printing.cucode import cu_imports
         class_type = expr.class_type
         rank  = expr.rank
 
@@ -1333,7 +1335,7 @@ def get_declare_type(self, expr):
                 dtype = 't_ndarray'
             elif isinstance(expr.class_type, CudaArrayType):
                 self.add_import(c_imports['ndarrays'])
-                self.add_import(c_imports['cuda_ndarrays'])
+                self.add_import(cu_imports['cuda_ndarrays'])
                 dtype = 't_ndarray'
             else:
                 errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal')
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index bf43a12b1a..171ecc97fa 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -17,13 +17,16 @@
 from pyccel.errors.errors           import Errors
 from pyccel.ast.cudatypes           import CudaArrayType
 from pyccel.ast.cudaext             import CudaFull
-from pyccel.codegen.printing.ccode  import c_imports
 
 
 errors = Errors()
 
 __all__ = ["CudaCodePrinter"]
 
+cu_imports = {n : Import(n, Module(n, (), ())) for n in
+                ['cuda_ndarrays',]
+                }
+
 class CudaCodePrinter(CCodePrinter):
     """
     Print code in CUDA format.
@@ -148,10 +151,10 @@ def _print_Allocate(self, expr):
         is_view = 'false' if variable.on_heap else 'true'
         memory_location = variable.class_type.memory_location
         if memory_location in ('device', 'host'):
-            memory_location = 'allocateMemoryOn' + str(memory_location).capitalize()
+            memory_location = str(memory_location).capitalize() + 'Memory'
         else:
             memory_location = 'managedMemory'
-        self.add_import(c_imports['cuda_ndarrays'])
+        self.add_import(cu_imports['cuda_ndarrays'])
         alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank},  shape_Assign_{expr.variable.name}, {dtype}, {is_view},{memory_location});\n"
         return f'{shape_Assign} {alloc_code}'
 
diff --git a/pyccel/cuda/cuda_arrays.py b/pyccel/cuda/cuda_arrays.py
index cbdf938c0c..ff37139b3f 100644
--- a/pyccel/cuda/cuda_arrays.py
+++ b/pyccel/cuda/cuda_arrays.py
@@ -6,7 +6,7 @@
 This submodule contains cuda_arrays methods for Pyccel.
 """
 
-def host_empty(shape):
+def host_empty(shape, dtype = 'float', order = 'C'):
     """
     Create an empty array on the host.
 
@@ -23,7 +23,7 @@ def host_empty(shape):
         The empty array on the host.
     """
     import numpy as np
-    a = np.empty(shape)
+    a = np.empty(shape, dtype = dtype, order = order)
     return a
 
 
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
index 47b0e5d420..348cb146b3 100644
--- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
@@ -1,19 +1,19 @@
 #include "cuda_ndarrays.h"
 
-void    device_memory(void** devPtr, size_t size)
+void    allocateMemoryOnDevice(void** devPtr, size_t size)
 {
     cudaMalloc(devPtr, size);
 }
 
-void    host_memory(void** devPtr, size_t size)
+void    allocateMemoryOnHost(void** devPtr, size_t size)
 {
-    cudaMallocHost(devPtr, size);
+    *devPtr = malloc(size);
 }
 t_ndarray   cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view ,
 enum e_memory_locations location)
 {
     t_ndarray  arr;
-    void (*fun_ptr_arr[])(void**, size_t) = {host_memory, device_memory};
+    void (*fun_ptr_arr[])(void**, size_t) = {allocateMemoryOnHost, allocateMemoryOnDevice};
 
     arr.nd = nd;
     arr.type = type;
@@ -66,10 +66,12 @@ int32_t cuda_free_host(t_ndarray  arr)
 {
     if (arr.shape == NULL)
         return (0);
-    cudaFreeHost(arr.raw_data);
+    free(arr.raw_data);
     arr.raw_data = NULL;
     cudaFree(arr.shape);
     arr.shape = NULL;
+    cudaFree(arr.strides);
+    arr.strides = NULL;
     return (1);
 }
 
@@ -82,5 +84,7 @@ int32_t cuda_free(t_ndarray  arr)
     arr.raw_data = NULL;
     cudaFree(arr.shape);
     arr.shape = NULL;
+    cudaFree(arr.strides);
+    arr.strides = NULL;
     return (0);
 }
\ No newline at end of file
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
index e7cbb4581f..9a29be594d 100644
--- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
+++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
@@ -8,8 +8,8 @@
 
 enum e_memory_locations
 {
-        allocateMemoryOnHost,
-        allocateMemoryOnDevice
+        HostMemory,
+        DeviceMemory
 };
 
 

From 8a4106fa0d36bfb8e758a703d278e1232c7600f7 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Wed, 24 Jul 2024 16:34:21 +0100
Subject: [PATCH 121/130] Split a long line to make it readable

---
 pyccel/codegen/utilities.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py
index 07be774063..e535e284d4 100644
--- a/pyccel/codegen/utilities.py
+++ b/pyccel/codegen/utilities.py
@@ -40,10 +40,12 @@
     "cuda_ndarrays": ("cuda_ndarrays", CompileObj("cuda_ndarrays.cu",folder="cuda_ndarrays")),
     "pyc_math_f90" : ("math", CompileObj("pyc_math_f90.f90",folder="math")),
     "pyc_math_c"   : ("math", CompileObj("pyc_math_c.c",folder="math")),
-    "cwrapper"     : ("cwrapper", CompileObj("cwrapper.c",folder="cwrapper", accelerators=('python',))),
+    "cwrapper"     : ("cwrapper", CompileObj("cwrapper.c",folder="cwrapper",
+                                             accelerators=('python',))),
     "numpy_f90"    : ("numpy", CompileObj("numpy_f90.f90",folder="numpy")),
     "numpy_c"      : ("numpy", CompileObj("numpy_c.c",folder="numpy")),
-    "Set_extensions" : ("STC_Extensions", CompileObj("Set_Extensions.h", folder="STC_Extensions", has_target_file = False)),
+    "Set_extensions" : ("STC_Extensions", CompileObj("Set_Extensions.h",
+                                             folder="STC_Extensions", has_target_file = False)),
 }
 internal_libs["cwrapper_ndarrays"] = ("cwrapper_ndarrays", CompileObj("cwrapper_ndarrays.c",folder="cwrapper_ndarrays",
                                                              accelerators = ('python',),

From eea028adc743f5c0b3792e23a364b868dad20504 Mon Sep 17 00:00:00 2001
From: smazouz42 <smazouz@student.1337.ma>
Date: Thu, 25 Jul 2024 10:56:13 +0100
Subject: [PATCH 122/130] fix doc string of host_empty

---
 pyccel/cuda/cuda_arrays.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pyccel/cuda/cuda_arrays.py b/pyccel/cuda/cuda_arrays.py
index ff37139b3f..5aa34bbf38 100644
--- a/pyccel/cuda/cuda_arrays.py
+++ b/pyccel/cuda/cuda_arrays.py
@@ -17,6 +17,12 @@ def host_empty(shape, dtype = 'float', order = 'C'):
     shape : tuple of int or int
         The shape of the array.
 
+    dtype : str, optional
+        The data type of the array. The default is 'float'.
+
+    order : str, optional
+        The order of the array. The default is 'C'.
+
     Returns
     -------
     array

From cc5a8cfe6f3cc89766e92cfb591ca31a8e77208b Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:41:27 +0100
Subject: [PATCH 123/130] Trigger tests on push to devel or main branch

---
 .github/workflows/anaconda_linux.yml   | 2 +-
 .github/workflows/anaconda_windows.yml | 2 +-
 .github/workflows/intel.yml            | 2 +-
 .github/workflows/linux.yml            | 2 +-
 .github/workflows/macosx.yml           | 2 +-
 .github/workflows/pickle.yml           | 2 +-
 .github/workflows/pickle_wheel.yml     | 2 +-
 .github/workflows/windows.yml          | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/anaconda_linux.yml b/.github/workflows/anaconda_linux.yml
index 5a5384e5ce..525903a54f 100644
--- a/.github/workflows/anaconda_linux.yml
+++ b/.github/workflows/anaconda_linux.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/anaconda_windows.yml b/.github/workflows/anaconda_windows.yml
index 154a4d01e8..0f3f8a04ed 100644
--- a/.github/workflows/anaconda_windows.yml
+++ b/.github/workflows/anaconda_windows.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: windows-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
index 977d5f9afd..5f340e1088 100644
--- a/.github/workflows/intel.yml
+++ b/.github/workflows/intel.yml
@@ -29,7 +29,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index ad39cee725..664ae3aa60 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   matrix_prep:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
     steps:
diff --git a/.github/workflows/macosx.yml b/.github/workflows/macosx.yml
index 4768a64efa..f51041c0b8 100644
--- a/.github/workflows/macosx.yml
+++ b/.github/workflows/macosx.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: macos-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/pickle.yml b/.github/workflows/pickle.yml
index 052028a5cb..cc3864afd2 100644
--- a/.github/workflows/pickle.yml
+++ b/.github/workflows/pickle.yml
@@ -31,7 +31,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-matrix.outputs.python_version }}
       matrix: ${{ steps.set-matrix.outputs.matrix }}
diff --git a/.github/workflows/pickle_wheel.yml b/.github/workflows/pickle_wheel.yml
index 1dc82af503..718dc13dcc 100644
--- a/.github/workflows/pickle_wheel.yml
+++ b/.github/workflows/pickle_wheel.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: ubuntu-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 60c560ffee..827038a279 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -28,7 +28,7 @@ env:
 jobs:
   Python_version_picker:
     runs-on: windows-latest
-    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel'
+    if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda'
     outputs:
       python_version: ${{ steps.set-python_version.outputs.python_version }}
     steps:

From a822c41f434fa9bec4fbf51b239787e29d0eef1e Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:46:33 +0100
Subject: [PATCH 124/130] Add cuda workflow to test cuda developments on CI

---
 .github/actions/coverage_install/action.yml |  2 +-
 .github/actions/linux_install/action.yml    | 10 +--
 .github/actions/pytest_run/action.yml       |  4 +-
 .github/actions/pytest_run_cuda/action.yml  | 17 +++++
 .github/actions/python_install/action.yml   | 17 +++++
 .github/workflows/cuda.yml                  | 83 +++++++++++++++++++++
 ci_tools/bot_messages/show_tests.txt        |  1 +
 ci_tools/bot_tools/bot_funcs.py             | 12 +--
 ci_tools/devel_branch_tests.py              |  1 +
 ci_tools/json_pytest_output.py              |  2 +-
 10 files changed, 135 insertions(+), 14 deletions(-)
 create mode 100644 .github/actions/pytest_run_cuda/action.yml
 create mode 100644 .github/actions/python_install/action.yml
 create mode 100644 .github/workflows/cuda.yml

diff --git a/.github/actions/coverage_install/action.yml b/.github/actions/coverage_install/action.yml
index ac5294e542..5732baee34 100644
--- a/.github/actions/coverage_install/action.yml
+++ b/.github/actions/coverage_install/action.yml
@@ -15,7 +15,7 @@ runs:
     - name: Directory Creation
       run: |
         INSTALL_DIR=$(cd tests; python -c "import pyccel; print(pyccel.__path__[0])")
-        SITE_DIR=$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
+        SITE_DIR=$(dirname ${INSTALL_DIR})
         echo -e "import coverage; coverage.process_startup()" > ${SITE_DIR}/pyccel_cov.pth
         echo -e "[run]\nparallel = True\nsource = ${INSTALL_DIR}\ndata_file = $(pwd)/.coverage\n[report]\ninclude = ${INSTALL_DIR}/*\n[xml]\noutput = cobertura.xml" > .coveragerc
         echo "SITE_DIR=${SITE_DIR}" >> $GITHUB_ENV
diff --git a/.github/actions/linux_install/action.yml b/.github/actions/linux_install/action.yml
index 8fb5cd8505..0ef9a69b8e 100644
--- a/.github/actions/linux_install/action.yml
+++ b/.github/actions/linux_install/action.yml
@@ -9,22 +9,22 @@ runs:
       shell: bash
     - name: Install fortran
       run:
-        sudo apt-get install gfortran
+        sudo apt-get install -y gfortran
       shell: bash
     - name: Install LaPack
       run:
-        sudo apt-get install libblas-dev liblapack-dev
+        sudo apt-get install -y libblas-dev liblapack-dev
       shell: bash
     - name: Install MPI
       run: |
-        sudo apt-get install libopenmpi-dev openmpi-bin
+        sudo apt-get install -y libopenmpi-dev openmpi-bin
         echo "MPI_OPTS=--oversubscribe" >> $GITHUB_ENV
       shell: bash
     - name: Install OpenMP
       run:
-        sudo apt-get install libomp-dev libomp5
+        sudo apt-get install -y libomp-dev libomp5
       shell: bash
     - name: Install Valgrind
       run:
-        sudo apt-get install valgrind
+        sudo apt-get install -y valgrind
       shell: bash
diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml
index 0b6f0f988d..b0bdc31f16 100644
--- a/.github/actions/pytest_run/action.yml
+++ b/.github/actions/pytest_run/action.yml
@@ -51,13 +51,13 @@ runs:
       working-directory: ./tests
       id: pytest_3
     - name: Test Fortran translations
-      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
+      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
       id: pytest_4
     - name: Test multi-file Fortran translations
       run: |
-        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
+        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml
new file mode 100644
index 0000000000..52092a6e02
--- /dev/null
+++ b/.github/actions/pytest_run_cuda/action.yml
@@ -0,0 +1,17 @@
+name: 'Pyccel pytest commands generating Ccuda'
+inputs:
+  shell_cmd:
+    description: 'Specifies the shell command (different for anaconda)'
+    required: false
+    default: "bash"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Ccuda tests with pytest
+      run: |
+        # Catch exit 5 (no tests found)
+        sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
+        pyccel-clean
+      shell: ${{ inputs.shell_cmd }}
+      working-directory: ./tests
diff --git a/.github/actions/python_install/action.yml b/.github/actions/python_install/action.yml
new file mode 100644
index 0000000000..f9b720e3e1
--- /dev/null
+++ b/.github/actions/python_install/action.yml
@@ -0,0 +1,17 @@
+name: 'Python installation commands'
+
+runs:
+  using: "composite"
+  steps:
+    - name: Install python
+      run:
+        sudo apt-get -y install python3-dev
+      shell: bash
+    - name: python as python3
+      run:
+        sudo apt-get -y install python-is-python3
+      shell: bash
+    - name: Install Pip
+      run:
+        sudo apt-get -y install python3-pip
+      shell: bash
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
new file mode 100644
index 0000000000..833ebf5d85
--- /dev/null
+++ b/.github/workflows/cuda.yml
@@ -0,0 +1,83 @@
+name: Cuda unit tests
+
+on:
+  workflow_dispatch:
+    inputs:
+      python_version:
+        required: false
+        type: string
+      ref:
+        required: false
+        type: string
+      check_run_id:
+        required: false
+        type: string
+      pr_repo:
+        required: false
+        type: string
+  push:
+    branches: [devel, main]
+
+env:
+  COMMIT: ${{ inputs.ref || github.event.ref }}
+  PEM: ${{ secrets.BOT_PEM }}
+  GITHUB_RUN_ID: ${{ github.run_id }}
+  GITHUB_CHECK_RUN_ID: ${{ inputs.check_run_id }}
+  PR_REPO: ${{ inputs.pr_repo || github.repository }}
+
+jobs:
+  Cuda:
+
+    runs-on: ubuntu-20.04
+    name: Unit tests
+
+    container: nvidia/cuda:11.7.1-devel-ubuntu20.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          ref: ${{ env.COMMIT }}
+          repository: ${{ env.PR_REPO }}
+      - name: Prepare docker
+        run: |
+          apt update && apt install sudo
+          TZ=Europe/France
+          ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+          DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata
+        shell: bash
+      - name: Install python (setup-python action doesn't work with containers)
+        uses: ./.github/actions/python_install
+      - name: "Setup"
+        id: token
+        run: |
+          pip install jwt requests
+          python ci_tools/setup_check_run.py cuda
+      - name: CUDA Version
+        run: nvcc --version # cuda install check
+      - name: Install dependencies
+        uses: ./.github/actions/linux_install
+      - name: Install Pyccel with tests
+        run: |
+            PATH=${PATH}:$HOME/.local/bin
+            echo "PATH=${PATH}" >> $GITHUB_ENV
+            python -m pip install --upgrade pip
+            python -m pip install --user .[test]
+        shell: bash
+      - name: Coverage install
+        uses: ./.github/actions/coverage_install
+      - name: Ccuda tests with pytest
+        id: cuda_pytest
+        uses: ./.github/actions/pytest_run_cuda
+      - name: Collect coverage information
+        continue-on-error: True
+        uses: ./.github/actions/coverage_collection
+      - name: Save code coverage report
+        uses: actions/upload-artifact@v3
+        with:
+          name: coverage-artifact
+          path: .coverage
+          retention-days: 1
+      - name: "Post completed"
+        if: always()
+        run:
+          python ci_tools/complete_check_run.py ${{ steps.cuda_pytest.outcome }}
+
diff --git a/ci_tools/bot_messages/show_tests.txt b/ci_tools/bot_messages/show_tests.txt
index adc07e8431..eb15492d2e 100644
--- a/ci_tools/bot_messages/show_tests.txt
+++ b/ci_tools/bot_messages/show_tests.txt
@@ -2,6 +2,7 @@ The following is a list of keywords which can be used to run tests. Tests in bol
 - **linux** : Runs the unit tests on a Linux system.
 - **windows** : Runs the unit tests on a Windows system.
 - **macosx** : Runs the unit tests on a MacOS X system.
+- **cuda** : Runs the cuda unit tests on a Linux system.
 - **coverage** : Runs the unit tests on a Linux system and checks the coverage of the tests.
 - **docs** : Checks if the documentation follows the numpydoc format.
 - **pylint** : Runs pylint on files which are too big to be handled by codacy.
diff --git a/ci_tools/bot_tools/bot_funcs.py b/ci_tools/bot_tools/bot_funcs.py
index 7084a01bb9..1621d1d089 100644
--- a/ci_tools/bot_tools/bot_funcs.py
+++ b/ci_tools/bot_tools/bot_funcs.py
@@ -23,7 +23,8 @@
         'pyccel_lint': '3.8',
         'pylint': '3.8',
         'spelling': '3.8',
-        'windows': '3.8'
+        'windows': '3.8',
+        'cuda': '-'
         }
 
 test_names = {
@@ -40,15 +41,16 @@
         'pyccel_lint': "Pyccel best practices",
         'pylint': "Python linting",
         'spelling': "Spelling verification",
-        'windows': "Unit tests on Windows"
+        'windows': "Unit tests on Windows",
+        'cuda': "Unit tests on Linux with cuda"
         }
 
-test_dependencies = {'coverage':['linux']}
+test_dependencies = {'coverage':['linux', 'cuda']}
 
 tests_with_base = ('coverage', 'docs', 'pyccel_lint', 'pylint')
 
 pr_test_keys = ('linux', 'windows', 'macosx', 'coverage', 'docs', 'pylint',
-                'pyccel_lint', 'spelling')
+                'pyccel_lint', 'spelling', 'cuda')
 
 review_stage_labels = ["needs_initial_review", "Ready_for_review", "Ready_to_merge"]
 
@@ -420,7 +422,7 @@ def is_test_required(self, commit_log, name, key, state):
             True if the test should be run, False otherwise.
         """
         print("Checking : ", name, key)
-        if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel'):
+        if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel', 'cuda'):
             has_relevant_change = lambda diff: any((f.startswith('pyccel/') or f.startswith('tests/')) #pylint: disable=unnecessary-lambda-assignment
                                                     and f.endswith('.py') and f != 'pyccel/version.py'
                                                     for f in diff)
diff --git a/ci_tools/devel_branch_tests.py b/ci_tools/devel_branch_tests.py
index 1102ef9e92..ec67b6c49a 100644
--- a/ci_tools/devel_branch_tests.py
+++ b/ci_tools/devel_branch_tests.py
@@ -15,3 +15,4 @@
     bot.run_tests(['anaconda_linux'], '3.10', force_run = True)
     bot.run_tests(['anaconda_windows'], '3.10', force_run = True)
     bot.run_tests(['intel'], '3.9', force_run = True)
+    bot.run_tests(['cuda'], '-', force_run = True)
diff --git a/ci_tools/json_pytest_output.py b/ci_tools/json_pytest_output.py
index 409ae76d72..b84f4a4c09 100644
--- a/ci_tools/json_pytest_output.py
+++ b/ci_tools/json_pytest_output.py
@@ -61,7 +61,7 @@ def     mini_md_summary(title, outcome, failed_tests):
     summary = ""
 
     failed_pattern = re.compile(r".*FAILED.*")
-    languages = ('c', 'fortran', 'python')
+    languages = ('c', 'fortran', 'python', 'cuda')
     pattern = {lang: re.compile(r".*\["+lang+r"\]\ \_.*") for lang in languages}
 
     for i in p_args.tests:

From 99b1838df30aec3273a358edc6a9f79ee0403607 Mon Sep 17 00:00:00 2001
From: EmilyBourne <louise.bourne@gmail.com>
Date: Mon, 11 Mar 2024 11:41:27 +0100
Subject: [PATCH 125/130] Trigger tests on push to devel or main branch

---
 .github/workflows/deploy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 920b14cf0b..391511329f 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -10,7 +10,7 @@ jobs:
   waitForWorklows:
     name: Wait for workflows
     runs-on: ubuntu-latest
-    if: github.event.workflow_run.head_branch == 'main'
+    if: github.event.workflow_run.head_branch == 'main' && github.repository == 'pyccel/pyccel'
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4

From 8828e56df84b16f212af582bdc040b4c84c23770 Mon Sep 17 00:00:00 2001
From: bauom <40796259+bauom@users.noreply.github.com>
Date: Wed, 28 Feb 2024 18:11:50 +0100
Subject: [PATCH 126/130] [init] Adding CUDA language/compiler and CodePrinter
 (#32)

This PR aims to make the C code compilable using nvcc. The cuda language was added as well as a CudaCodePrinter.

Changes to stdlib:

Wrapped expressions using complex types in an `ifndef __NVCC__` to avoid processing them with the nvcc compiler

---------

Co-authored-by: Mouad Elalj, EmilyBourne
---
 .dict_custom.txt                           |   1 +
 .github/actions/pytest_parallel/action.yml |   4 +-
 .github/actions/pytest_run/action.yml      |   4 +-
 .github/actions/pytest_run_cuda/action.yml |  11 +-
 CHANGELOG.md                               |   6 +
 pyccel/codegen/codegen.py                  |   8 +-
 pyccel/codegen/compiling/compilers.py      |   5 +-
 pyccel/codegen/pipeline.py                 |   5 +-
 pyccel/codegen/printing/cucode.py          |  74 +++++++++++
 pyccel/commands/console.py                 |   2 +-
 pyccel/compilers/default_compilers.py      |  13 +-
 pyccel/naming/__init__.py                  |   4 +-
 pyccel/naming/cudanameclashchecker.py      |  92 ++++++++++++++
 pyccel/stdlib/numpy/numpy_c.c              |   2 +
 pyccel/stdlib/numpy/numpy_c.h              |   2 +
 pytest.ini                                 |   1 +
 tests/conftest.py                          |  11 ++
 tests/epyccel/test_base.py                 | 136 ++++++++++-----------
 18 files changed, 298 insertions(+), 83 deletions(-)
 create mode 100644 pyccel/codegen/printing/cucode.py
 create mode 100644 pyccel/naming/cudanameclashchecker.py

diff --git a/.dict_custom.txt b/.dict_custom.txt
index b9240f6215..161337d33b 100644
--- a/.dict_custom.txt
+++ b/.dict_custom.txt
@@ -110,6 +110,7 @@ Valgrind
 variadic
 subclasses
 oneAPI
+Cuda
 getter
 setter
 bitwise
diff --git a/.github/actions/pytest_parallel/action.yml b/.github/actions/pytest_parallel/action.yml
index c7c77d99c7..f91d84915b 100644
--- a/.github/actions/pytest_parallel/action.yml
+++ b/.github/actions/pytest_parallel/action.yml
@@ -10,8 +10,8 @@ runs:
   steps:
     - name: Test with pytest
       run: |
-        mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m parallel -rXx
-        #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m parallel -rXx
+        mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m "parallel and not cuda" -rXx
+        #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m "parallel and not cuda" -rXx
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
 
diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml
index b0bdc31f16..451fa39e92 100644
--- a/.github/actions/pytest_run/action.yml
+++ b/.github/actions/pytest_run/action.yml
@@ -51,13 +51,13 @@ runs:
       working-directory: ./tests
       id: pytest_3
     - name: Test Fortran translations
-      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
+      run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
       id: pytest_4
     - name: Test multi-file Fortran translations
       run: |
-        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
+        python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml
index 52092a6e02..46f90552ed 100644
--- a/.github/actions/pytest_run_cuda/action.yml
+++ b/.github/actions/pytest_run_cuda/action.yml
@@ -1,4 +1,4 @@
-name: 'Pyccel pytest commands generating Ccuda'
+name: 'Pyccel pytest commands generating Cuda'
 inputs:
   shell_cmd:
     description: 'Specifies the shell command (different for anaconda)'
@@ -11,7 +11,14 @@ runs:
     - name: Ccuda tests with pytest
       run: |
         # Catch exit 5 (no tests found)
-        sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
+        python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out
         pyccel-clean
       shell: ${{ inputs.shell_cmd }}
       working-directory: ./tests
+    - name: Final step
+      if: always()
+      id: status
+      run:
+        python ci_tools/json_pytest_output.py -t "Cuda Test Summary" --tests "Cuda tests:${{ steps.pytest_1.outcome }}:tests/s1_outfile.out"
+                      
+      shell: ${{ inputs.shell_cmd }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a5f103a85f..1825bf5cbd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,12 @@
 # Change Log
 All notable changes to this project will be documented in this file.
 
+## \[Cuda - UNRELEASED\]
+
+### Added
+
+-   #32 : add support for `nvcc` Compiler and `cuda` language as a possible option.
+
 ## \[UNRELEASED\]
 
 ### Added
diff --git a/pyccel/codegen/codegen.py b/pyccel/codegen/codegen.py
index a7a02d7804..33721a48e8 100644
--- a/pyccel/codegen/codegen.py
+++ b/pyccel/codegen/codegen.py
@@ -9,16 +9,18 @@
 from pyccel.codegen.printing.fcode  import FCodePrinter
 from pyccel.codegen.printing.ccode  import CCodePrinter
 from pyccel.codegen.printing.pycode import PythonCodePrinter
+from pyccel.codegen.printing.cucode import CudaCodePrinter
 
 from pyccel.ast.core      import FunctionDef, Interface, ModuleHeader
 from pyccel.utilities.stage import PyccelStage
 
-_extension_registry = {'fortran': 'f90', 'c':'c',  'python':'py'}
-_header_extension_registry = {'fortran': None, 'c':'h',  'python':None}
+_extension_registry = {'fortran': 'f90', 'c':'c',  'python':'py', 'cuda':'cu'}
+_header_extension_registry = {'fortran': None, 'c':'h',  'python':None, 'cuda':'h'}
 printer_registry    = {
                         'fortran':FCodePrinter,
                         'c':CCodePrinter,
-                        'python':PythonCodePrinter
+                        'python':PythonCodePrinter,
+                        'cuda':CudaCodePrinter
                       }
 
 pyccel_stage = PyccelStage()
diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py
index c866ee5b1a..d909a5036e 100644
--- a/pyccel/codegen/compiling/compilers.py
+++ b/pyccel/codegen/compiling/compilers.py
@@ -444,7 +444,10 @@ def compile_shared_library(self, compile_obj, output_folder, verbose = False, sh
         # Collect compile information
         exec_cmd, includes, libs_flags, libdirs_flags, m_code = \
                 self._get_compile_components(compile_obj, accelerators)
-        linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags]
+        if self._info['exec'] == 'nvcc':
+            linker_libdirs_flags = ['-Xcompiler' if l == '-L' else f'"-Wl,-rpath,{l}"' for l in libdirs_flags]
+        else:
+            linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags]
 
         flags.insert(0,"-shared")
 
diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py
index 14087fb567..eb357fab74 100644
--- a/pyccel/codegen/pipeline.py
+++ b/pyccel/codegen/pipeline.py
@@ -180,9 +180,10 @@ def handle_error(stage):
     if language is None:
         language = 'fortran'
 
-    # Choose Fortran compiler
+    # Choose Default compiler
     if compiler is None:
-        compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', 'GNU')
+        default_compiler_family = 'nvidia' if language == 'cuda' else 'GNU'
+        compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', default_compiler_family)
 
     fflags = [] if fflags is None else fflags.split()
     wrapper_flags = [] if wrapper_flags is None else wrapper_flags.split()
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
new file mode 100644
index 0000000000..86146b065b
--- /dev/null
+++ b/pyccel/codegen/printing/cucode.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Provide tools for generating and handling CUDA code.
+This module is designed to interface Pyccel's Abstract Syntax Tree (AST) with CUDA,
+enabling the direct translation of high-level Pyccel expressions into CUDA code.
+"""
+
+from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers
+
+from pyccel.ast.core        import Import, Module
+
+from pyccel.errors.errors   import Errors
+
+
+errors = Errors()
+
+__all__ = ["CudaCodePrinter"]
+
+class CudaCodePrinter(CCodePrinter):
+    """
+    Print code in CUDA format.
+
+    This printer converts Pyccel's Abstract Syntax Tree (AST) into strings of CUDA code.
+    Navigation through this file utilizes _print_X functions,
+    as is common with all printers.
+
+    Parameters
+    ----------
+    filename : str
+            The name of the file being pyccelised.
+    prefix_module : str
+            A prefix to be added to the name of the module.
+    """
+    language = "cuda"
+
+    def __init__(self, filename, prefix_module = None):
+
+        errors.set_target(filename)
+
+        super().__init__(filename)
+
+    def _print_Module(self, expr):
+        self.set_scope(expr.scope)
+        self._current_module = expr.name
+        body = ''.join(self._print(i) for i in expr.body)
+
+        global_variables = ''.join(self._print(d) for d in expr.declarations)
+
+        # Print imports last to be sure that all additional_imports have been collected
+        imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()]
+        c_headers_imports = ''
+        local_imports = ''
+
+        for imp in imports:
+            if imp.source in c_library_headers:
+                c_headers_imports += self._print(imp)
+            else:
+                local_imports += self._print(imp)
+
+        imports = f'{c_headers_imports}\
+                    extern "C"{{\n\
+                    {local_imports}\
+                    }}'
+
+        code = f'{imports}\n\
+                 {global_variables}\n\
+                 {body}\n'
+
+        self.exit_scope()
+        return code
diff --git a/pyccel/commands/console.py b/pyccel/commands/console.py
index 596c440ec0..fcbec009de 100644
--- a/pyccel/commands/console.py
+++ b/pyccel/commands/console.py
@@ -80,7 +80,7 @@ def pyccel(files=None, mpi=None, openmp=None, openacc=None, output_dir=None, com
     # ... backend compiler options
     group = parser.add_argument_group('Backend compiler options')
 
-    group.add_argument('--language', choices=('fortran', 'c', 'python'), help='Generated language')
+    group.add_argument('--language', choices=('fortran', 'c', 'python', 'cuda'), help='Generated language')
 
     group.add_argument('--compiler', help='Compiler family or json file containing a compiler description {GNU,intel,PGI}')
 
diff --git a/pyccel/compilers/default_compilers.py b/pyccel/compilers/default_compilers.py
index 166085d22e..d47856773c 100644
--- a/pyccel/compilers/default_compilers.py
+++ b/pyccel/compilers/default_compilers.py
@@ -185,6 +185,15 @@
                 },
             'family': 'nvidia',
             }
+#------------------------------------------------------------
+nvcc_info = {'exec'         : 'nvcc',
+             'language'     : 'cuda',
+             'debug_flags'  : ("-g",),
+             'release_flags': ("-O3",),
+             'general_flags': ('--compiler-options', '-fPIC',),
+             'family'       : 'nvidia'
+            }
+
 
 #------------------------------------------------------------
 def change_to_lib_flag(lib):
@@ -288,6 +297,7 @@ def change_to_lib_flag(lib):
 pgfortran_info.update(python_info)
 nvc_info.update(python_info)
 nvfort_info.update(python_info)
+nvcc_info.update(python_info)
 
 available_compilers = {('GNU', 'c') : gcc_info,
                        ('GNU', 'fortran') : gfort_info,
@@ -296,6 +306,7 @@ def change_to_lib_flag(lib):
                        ('PGI', 'c') : pgcc_info,
                        ('PGI', 'fortran') : pgfortran_info,
                        ('nvidia', 'c') : nvc_info,
-                       ('nvidia', 'fortran') : nvfort_info}
+                       ('nvidia', 'fortran') : nvfort_info,
+                       ('nvidia', 'cuda'): nvcc_info}
 
 vendors = ('GNU','intel','PGI','nvidia')
diff --git a/pyccel/naming/__init__.py b/pyccel/naming/__init__.py
index 72c318d3ad..b3e4bbbe0e 100644
--- a/pyccel/naming/__init__.py
+++ b/pyccel/naming/__init__.py
@@ -10,7 +10,9 @@
 from .fortrannameclashchecker import FortranNameClashChecker
 from .cnameclashchecker import CNameClashChecker
 from .pythonnameclashchecker import PythonNameClashChecker
+from .cudanameclashchecker import CudaNameClashChecker
 
 name_clash_checkers = {'fortran':FortranNameClashChecker(),
         'c':CNameClashChecker(),
-        'python':PythonNameClashChecker()}
+        'python':PythonNameClashChecker(),
+        'cuda':CudaNameClashChecker()}
diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py
new file mode 100644
index 0000000000..971204e912
--- /dev/null
+++ b/pyccel/naming/cudanameclashchecker.py
@@ -0,0 +1,92 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Handles name clash problems in Cuda
+"""
+from .languagenameclashchecker import LanguageNameClashChecker
+
+class CudaNameClashChecker(LanguageNameClashChecker):
+    """
+    Class containing functions to help avoid problematic names in Cuda.
+
+    A class which provides functionalities to check or propose variable names and
+    verify that they do not cause name clashes. Name clashes may be due to
+    new variables, or due to the use of reserved keywords.
+    """
+    # Keywords as mentioned on https://en.cppreference.com/w/c/keyword
+    keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const',
+        'continue', 'default', 'do', 'double', 'else', 'enum',
+        'extern', 'float', 'for', 'goto', 'if', 'inline', 'int',
+        'long', 'register', 'restrict', 'return', 'short', 'signed',
+        'sizeof', 'static', 'struct', 'switch', 'typedef', 'union',
+        'unsigned', 'void', 'volatile', 'whie', '_Alignas',
+        '_Alignof', '_Atomic', '_Bool', '_Complex', 'Decimal128',
+        '_Decimal32', '_Decimal64', '_Generic', '_Imaginary',
+        '_Noreturn', '_Static_assert', '_Thread_local', 't_ndarray',
+        'array_create', 'new_slice', 'array_slicing', 'alias_assign',
+        'transpose_alias_assign', 'array_fill', 't_slice',
+        'GET_INDEX_EXP1', 'GET_INDEX_EXP2', 'GET_INDEX_EXP2',
+        'GET_INDEX_EXP3', 'GET_INDEX_EXP4', 'GET_INDEX_EXP5',
+        'GET_INDEX_EXP6', 'GET_INDEX_EXP7', 'GET_INDEX_EXP8',
+        'GET_INDEX_EXP9', 'GET_INDEX_EXP10', 'GET_INDEX_EXP11',
+        'GET_INDEX_EXP12', 'GET_INDEX_EXP13', 'GET_INDEX_EXP14',
+        'GET_INDEX_EXP15', 'NUM_ARGS_H1', 'NUM_ARGS',
+        'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX',
+        'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer',
+        'get_index', 'numpy_to_ndarray_strides',
+        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'])
+
+    def has_clash(self, name, symbols):
+        """
+        Indicate whether the proposed name causes any clashes.
+
+        Checks if a suggested name conflicts with predefined
+        keywords or specified symbols,returning true for a clash.
+        This method is crucial for maintaining namespace integrity and
+        preventing naming conflicts in code generation processes.
+
+        Parameters
+        ----------
+        name : str
+            The suggested name.
+        symbols : set
+            Symbols which should be considered as collisions.
+
+        Returns
+        -------
+        bool
+            True if the name is a collision.
+            False if the name is collision free.
+        """
+        return any(name == k for k in self.keywords) or \
+               any(name == s for s in symbols)
+
+    def get_collisionless_name(self, name, symbols):
+        """
+        Get a valid name which doesn't collision with symbols or Cuda keywords.
+
+        Find a new name based on the suggested name which will not cause
+        conflicts with Cuda keywords, does not appear in the provided symbols,
+        and is a valid name in Cuda code.
+
+        Parameters
+        ----------
+        name : str
+            The suggested name.
+        symbols : set
+            Symbols which should be considered as collisions.
+
+        Returns
+        -------
+        str
+            A new name which is collision free.
+        """
+        if len(name)>4 and all(name[i] == '_' for i in (0,1,-1,-2)):
+            # Ignore magic methods
+            return name
+        if name[0] == '_':
+            name = 'private'+name
+        return self._get_collisionless_name(name, symbols)
diff --git a/pyccel/stdlib/numpy/numpy_c.c b/pyccel/stdlib/numpy/numpy_c.c
index 7c9ecbbf6b..bc56214772 100644
--- a/pyccel/stdlib/numpy/numpy_c.c
+++ b/pyccel/stdlib/numpy/numpy_c.c
@@ -17,8 +17,10 @@ double  fsign(double x)
     return SIGN(x);
 }
 
+#ifndef __NVCC__
 /* numpy.sign for complex */
 double complex csign(double complex x)
 {
     return x ? ((!creal(x) && cimag(x) < 0) || (creal(x) < 0) ? -1 : 1) : 0;
 }
+#endif
diff --git a/pyccel/stdlib/numpy/numpy_c.h b/pyccel/stdlib/numpy/numpy_c.h
index e72cf3ad57..c2a16a5516 100644
--- a/pyccel/stdlib/numpy/numpy_c.h
+++ b/pyccel/stdlib/numpy/numpy_c.h
@@ -15,6 +15,8 @@
 
 long long int isign(long long int x);
 double fsign(double x);
+#ifndef __NVCC__
 double complex csign(double complex x);
+#endif
 
 #endif
diff --git a/pytest.ini b/pytest.ini
index 42eb0d72ba..3792ab65f9 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -9,3 +9,4 @@ markers =
     python: test to generate python code
     xdist_incompatible: test which compiles a file also compiled by another test
     external: test using an external dll (problematic with conda on Windows)
+    cuda: test to generate cuda code
diff --git a/tests/conftest.py b/tests/conftest.py
index 79144b6978..a5082ef6e8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -21,6 +21,17 @@
 def language(request):
     return request.param
 
+@pytest.fixture( params=[
+        pytest.param("fortran", marks = pytest.mark.fortran),
+        pytest.param("c", marks = pytest.mark.c),
+        pytest.param("python", marks = pytest.mark.python),
+        pytest.param("cuda", marks = pytest.mark.cuda)
+    ],
+    scope = "session"
+)
+def language_with_cuda(request):
+    return request.param
+
 def move_coverage(path_dir):
     for root, _, files in os.walk(path_dir):
         for name in files:
diff --git a/tests/epyccel/test_base.py b/tests/epyccel/test_base.py
index c22064d321..413f79eef1 100644
--- a/tests/epyccel/test_base.py
+++ b/tests/epyccel/test_base.py
@@ -7,128 +7,128 @@
 from utilities import epyccel_test
 
 
-def test_is_false(language):
-    test = epyccel_test(base.is_false, lang=language)
+def test_is_false(language_with_cuda):
+    test = epyccel_test(base.is_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_is_true(language):
-    test = epyccel_test(base.is_true, lang=language)
+def test_is_true(language_with_cuda):
+    test = epyccel_test(base.is_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_compare_is(language):
-    test = epyccel_test(base.compare_is, lang=language)
+def test_compare_is(language_with_cuda):
+    test = epyccel_test(base.compare_is, lang=language_with_cuda)
     test.compare_epyccel( True, True )
     test.compare_epyccel( True, False )
     test.compare_epyccel( False, True )
     test.compare_epyccel( False, False )
 
-def test_compare_is_not(language):
-    test = epyccel_test(base.compare_is_not, lang=language)
+def test_compare_is_not(language_with_cuda):
+    test = epyccel_test(base.compare_is_not, lang=language_with_cuda)
     test.compare_epyccel( True, True )
     test.compare_epyccel( True, False )
     test.compare_epyccel( False, True )
     test.compare_epyccel( False, False )
 
-def test_compare_is_int(language):
-    test = epyccel_test(base.compare_is_int, lang=language)
+def test_compare_is_int(language_with_cuda):
+    test = epyccel_test(base.compare_is_int, lang=language_with_cuda)
     test.compare_epyccel( True, 1 )
     test.compare_epyccel( True, 0 )
     test.compare_epyccel( False, 1 )
     test.compare_epyccel( False, 0 )
 
-def test_compare_is_not_int(language):
-    test = epyccel_test(base.compare_is_not_int, lang=language)
+def test_compare_is_not_int(language_with_cuda):
+    test = epyccel_test(base.compare_is_not_int, lang=language_with_cuda)
     test.compare_epyccel( True, 1 )
     test.compare_epyccel( True, 0 )
     test.compare_epyccel( False, 1 )
     test.compare_epyccel( False, 0 )
 
-def test_not_false(language):
-    test = epyccel_test(base.not_false, lang=language)
+def test_not_false(language_with_cuda):
+    test = epyccel_test(base.not_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not_true(language):
-    test = epyccel_test(base.not_true, lang=language)
+def test_not_true(language_with_cuda):
+    test = epyccel_test(base.not_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_eq_false(language):
-    test = epyccel_test(base.eq_false, lang=language)
+def test_eq_false(language_with_cuda):
+    test = epyccel_test(base.eq_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_eq_true(language):
-    test = epyccel_test(base.eq_true, lang=language)
+def test_eq_true(language_with_cuda):
+    test = epyccel_test(base.eq_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_neq_false(language):
-    test = epyccel_test(base.eq_false, lang=language)
+def test_neq_false(language_with_cuda):
+    test = epyccel_test(base.eq_false, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_neq_true(language):
-    test = epyccel_test(base.eq_true, lang=language)
+def test_neq_true(language_with_cuda):
+    test = epyccel_test(base.eq_true, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not(language):
-    test = epyccel_test(base.not_val, lang=language)
+def test_not(language_with_cuda):
+    test = epyccel_test(base.not_val, lang=language_with_cuda)
     test.compare_epyccel( True )
     test.compare_epyccel( False )
 
-def test_not_int(language):
-    test = epyccel_test(base.not_int, lang=language)
+def test_not_int(language_with_cuda):
+    test = epyccel_test(base.not_int, lang=language_with_cuda)
     test.compare_epyccel( 0 )
     test.compare_epyccel( 4 )
 
-def test_compare_is_nil(language):
-    test = epyccel_test(base.is_nil, lang=language)
+def test_compare_is_nil(language_with_cuda):
+    test = epyccel_test(base.is_nil, lang=language_with_cuda)
     test.compare_epyccel( None )
 
-def test_compare_is_not_nil(language):
-    test = epyccel_test(base.is_not_nil, lang=language)
+def test_compare_is_not_nil(language_with_cuda):
+    test = epyccel_test(base.is_not_nil, lang=language_with_cuda)
     test.compare_epyccel( None )
 
-def test_cast_int(language):
-    test = epyccel_test(base.cast_int, lang=language)
+def test_cast_int(language_with_cuda):
+    test = epyccel_test(base.cast_int, lang=language_with_cuda)
     test.compare_epyccel( 4 )
-    test = epyccel_test(base.cast_float_to_int, lang=language)
+    test = epyccel_test(base.cast_float_to_int, lang=language_with_cuda)
     test.compare_epyccel( 4.5 )
 
-def test_cast_bool(language):
-    test = epyccel_test(base.cast_bool, lang=language)
+def test_cast_bool(language_with_cuda):
+    test = epyccel_test(base.cast_bool, lang=language_with_cuda)
     test.compare_epyccel( True )
 
-def test_cast_float(language):
-    test = epyccel_test(base.cast_float, lang=language)
+def test_cast_float(language_with_cuda):
+    test = epyccel_test(base.cast_float, lang=language_with_cuda)
     test.compare_epyccel( 4.5 )
-    test = epyccel_test(base.cast_int_to_float, lang=language)
+    test = epyccel_test(base.cast_int_to_float, lang=language_with_cuda)
     test.compare_epyccel( 4 )
 
-def test_if_0_int(language):
-    test = epyccel_test(base.if_0_int, lang=language)
+def test_if_0_int(language_with_cuda):
+    test = epyccel_test(base.if_0_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
     test.compare_epyccel( 0 )
 
-def test_if_0_real(language):
-    test = epyccel_test(base.if_0_real, lang=language)
+def test_if_0_real(language_with_cuda):
+    test = epyccel_test(base.if_0_real, lang=language_with_cuda)
     test.compare_epyccel( 22.3 )
     test.compare_epyccel( 0.0 )
 
-def test_same_int(language):
-    test = epyccel_test(base.is_same_int, lang=language)
+def test_same_int(language_with_cuda):
+    test = epyccel_test(base.is_same_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
-    test = epyccel_test(base.isnot_same_int, lang=language)
+    test = epyccel_test(base.isnot_same_int, lang=language_with_cuda)
     test.compare_epyccel( 22 )
 
-def test_same_float(language):
-    test = epyccel_test(base.is_same_float, lang=language)
+def test_same_float(language_with_cuda):
+    test = epyccel_test(base.is_same_float, lang=language_with_cuda)
     test.compare_epyccel( 22.2 )
-    test = epyccel_test(base.isnot_same_float, lang=language)
+    test = epyccel_test(base.isnot_same_float, lang=language_with_cuda)
     test.compare_epyccel( 22.2 )
 
 @pytest.mark.parametrize( 'language', [
@@ -150,28 +150,28 @@ def test_same_complex(language):
     test = epyccel_test(base.isnot_same_complex, lang=language)
     test.compare_epyccel( complex(2,3) )
 
-def test_is_types(language):
-    test = epyccel_test(base.is_types, lang=language)
+def test_is_types(language_with_cuda):
+    test = epyccel_test(base.is_types, lang=language_with_cuda)
     test.compare_epyccel( 1, 1.0 )
 
-def test_isnot_types(language):
-    test = epyccel_test(base.isnot_types, lang=language)
+def test_isnot_types(language_with_cuda):
+    test = epyccel_test(base.isnot_types, lang=language_with_cuda)
     test.compare_epyccel( 1, 1.0 )
 
-def test_none_is_none(language):
-    test = epyccel_test(base.none_is_none, lang=language)
+def test_none_is_none(language_with_cuda):
+    test = epyccel_test(base.none_is_none, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_none_isnot_none(language):
-    test = epyccel_test(base.none_isnot_none, lang=language)
+def test_none_isnot_none(language_with_cuda):
+    test = epyccel_test(base.none_isnot_none, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_pass_if(language):
-    test = epyccel_test(base.pass_if, lang=language)
+def test_pass_if(language_with_cuda):
+    test = epyccel_test(base.pass_if, lang=language_with_cuda)
     test.compare_epyccel(2)
 
-def test_pass2_if(language):
-    test = epyccel_test(base.pass2_if, lang=language)
+def test_pass2_if(language_with_cuda):
+    test = epyccel_test(base.pass2_if, lang=language_with_cuda)
     test.compare_epyccel(0.2)
     test.compare_epyccel(0.0)
 
@@ -192,15 +192,15 @@ def test_use_optional(language):
     test.compare_epyccel()
     test.compare_epyccel(6)
 
-def test_none_equality(language):
-    test = epyccel_test(base.none_equality, lang=language)
+def test_none_equality(language_with_cuda):
+    test = epyccel_test(base.none_equality, lang=language_with_cuda)
     test.compare_epyccel()
     test.compare_epyccel(6)
 
-def test_none_none_equality(language):
-    test = epyccel_test(base.none_none_equality, lang=language)
+def test_none_none_equality(language_with_cuda):
+    test = epyccel_test(base.none_none_equality, lang=language_with_cuda)
     test.compare_epyccel()
 
-def test_none_literal_equality(language):
-    test = epyccel_test(base.none_literal_equality, lang=language)
+def test_none_literal_equality(language_with_cuda):
+    test = epyccel_test(base.none_literal_equality, lang=language_with_cuda)
     test.compare_epyccel()

From 7ad90da333294bad9679efdd756b83114dccd369 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 15 May 2024 12:58:50 +0100
Subject: [PATCH 127/130] Fix import handling (#49)

This pull request fixes https://github.com/pyccel/pyccel-cuda/issues/48, by implementing a tiny wrapper for CUDA and a wrapper for non-CUDA functionalities only with external 'C'.

**Commit Summary**

-    Implemented new header printer for CUDA.
-    Added CUDA wrapper assignment
-    Instead of wrapping all local headers, wrap only C functions with extern 'C'

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
---
 CHANGELOG.md                                |  3 +-
 pyccel/codegen/printing/cucode.py           | 45 ++++++++----
 pyccel/codegen/python_wrapper.py            |  4 ++
 pyccel/codegen/wrapper/cuda_to_c_wrapper.py | 78 +++++++++++++++++++++
 tests/epyccel/modules/cuda_module.py        | 13 ++++
 tests/epyccel/test_epyccel_modules.py       | 13 ++++
 6 files changed, 142 insertions(+), 14 deletions(-)
 create mode 100644 pyccel/codegen/wrapper/cuda_to_c_wrapper.py
 create mode 100644 tests/epyccel/modules/cuda_module.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1825bf5cbd..ac1c913a26 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,8 @@ All notable changes to this project will be documented in this file.
 
 ### Added
 
--   #32 : add support for `nvcc` Compiler and `cuda` language as a possible option.
+-   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
+-   #48 : Fix incorrect handling of imports in `cuda`.
 
 ## \[UNRELEASED\]
 
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 86146b065b..277d2a3a6a 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -52,19 +52,7 @@ def _print_Module(self, expr):
 
         # Print imports last to be sure that all additional_imports have been collected
         imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()]
-        c_headers_imports = ''
-        local_imports = ''
-
-        for imp in imports:
-            if imp.source in c_library_headers:
-                c_headers_imports += self._print(imp)
-            else:
-                local_imports += self._print(imp)
-
-        imports = f'{c_headers_imports}\
-                    extern "C"{{\n\
-                    {local_imports}\
-                    }}'
+        imports = ''.join(self._print(i) for i in imports)
 
         code = f'{imports}\n\
                  {global_variables}\n\
@@ -72,3 +60,34 @@ def _print_Module(self, expr):
 
         self.exit_scope()
         return code
+
+    def _print_ModuleHeader(self, expr):
+        self.set_scope(expr.module.scope)
+        self._in_header = True
+        name = expr.module.name
+
+        funcs = ""
+        cuda_headers = ""
+        for f in expr.module.funcs:
+            if not f.is_inline:
+                if 'kernel' in f.decorators:  # Checking for 'kernel' decorator
+                    cuda_headers += self.function_signature(f) + ';\n'
+                else:
+                    funcs += self.function_signature(f) + ';\n'
+        global_variables = ''.join('extern '+self._print(d) for d in expr.module.declarations if not d.variable.is_private)
+        # Print imports last to be sure that all additional_imports have been collected
+        imports = [*expr.module.imports, *self._additional_imports.values()]
+        imports = ''.join(self._print(i) for i in imports)
+
+        self._in_header = False
+        self.exit_scope()
+        function_declaration = f'{cuda_headers}\n\
+                    extern "C"{{\n\
+                    {funcs}\
+                    }}\n'
+        return '\n'.join((f"#ifndef {name.upper()}_H",
+                          f"#define {name.upper()}_H",
+                          global_variables,
+                          function_declaration,
+                          "#endif // {name.upper()}_H\n"))
+
diff --git a/pyccel/codegen/python_wrapper.py b/pyccel/codegen/python_wrapper.py
index 9437727042..62c303fa64 100644
--- a/pyccel/codegen/python_wrapper.py
+++ b/pyccel/codegen/python_wrapper.py
@@ -13,6 +13,7 @@
 from pyccel.codegen.printing.fcode               import FCodePrinter
 from pyccel.codegen.wrapper.fortran_to_c_wrapper import FortranToCWrapper
 from pyccel.codegen.wrapper.c_to_python_wrapper  import CToPythonWrapper
+from pyccel.codegen.wrapper.cuda_to_c_wrapper    import CudaToCWrapper
 from pyccel.codegen.utilities                    import recompile_object
 from pyccel.codegen.utilities                    import copy_internal_library
 from pyccel.codegen.utilities                    import internal_libs
@@ -144,6 +145,9 @@ def create_shared_library(codegen,
                 verbose=verbose)
         timings['Bind C wrapping'] = time.time() - start_bind_c_compiling
         c_ast = bind_c_mod
+    elif language == 'cuda':
+        wrapper = CudaToCWrapper()
+        c_ast = wrapper.wrap(codegen.ast)
     else:
         c_ast = codegen.ast
 
diff --git a/pyccel/codegen/wrapper/cuda_to_c_wrapper.py b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py
new file mode 100644
index 0000000000..c0e24c7c09
--- /dev/null
+++ b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+Module describing the code-wrapping class : CudaToPythonWrapper
+which creates an interface exposing Cuda code to C.
+"""
+
+from pyccel.ast.bind_c      import BindCModule
+from pyccel.errors.errors   import Errors
+from pyccel.ast.bind_c      import BindCVariable
+from .wrapper               import Wrapper
+
+errors = Errors()
+
+class CudaToCWrapper(Wrapper):
+    """
+    Class for creating a wrapper exposing Cuda code to C.
+
+    While CUDA is typically compatible with C by default.
+    this wrapper becomes necessary in scenarios where specific adaptations
+    or modifications are required to ensure seamless integration with C.
+    """
+
+    def _wrap_Module(self, expr):
+        """
+        Create a Module which is compatible with C.
+
+        Create a Module which provides an interface between C and the
+        Module described by expr.
+
+        Parameters
+        ----------
+        expr : pyccel.ast.core.Module
+            The module to be wrapped.
+
+        Returns
+        -------
+        pyccel.ast.core.BindCModule
+            The C-compatible module.
+        """
+        init_func = expr.init_func
+        if expr.interfaces:
+            errors.report("Interface wrapping is not yet supported for Cuda",
+                      severity='warning', symbol=expr)
+        if expr.classes:
+            errors.report("Class wrapping is not yet supported for Cuda",
+                      severity='warning', symbol=expr)
+
+        variables = [self._wrap(v) for v in expr.variables]
+
+        return BindCModule(expr.name, variables, expr.funcs,
+                init_func=init_func,
+                scope = expr.scope,
+                original_module=expr)
+
+    def _wrap_Variable(self, expr):
+        """
+        Create all objects necessary to expose a module variable to C.
+
+        Create and return the objects which must be printed in the wrapping
+        module in order to expose the variable to C
+
+        Parameters
+        ----------
+        expr : pyccel.ast.variables.Variable
+            The module variable.
+
+        Returns
+        -------
+        pyccel.ast.core.BindCVariable
+            The C-compatible variable. which must be printed in
+            the wrapping module to expose the variable.
+        """
+        return expr.clone(expr.name, new_class = BindCVariable)
+
diff --git a/tests/epyccel/modules/cuda_module.py b/tests/epyccel/modules/cuda_module.py
new file mode 100644
index 0000000000..bb7ae6b98a
--- /dev/null
+++ b/tests/epyccel/modules/cuda_module.py
@@ -0,0 +1,13 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import numpy as np
+
+g = np.float64(9.81)
+r0 = np.float32(1.0)
+rmin = 0.01
+rmax = 1.0
+
+skip_centre = True
+
+method = 3
+
+tiny = np.int32(4)
diff --git a/tests/epyccel/test_epyccel_modules.py b/tests/epyccel/test_epyccel_modules.py
index ad8ae0bd75..223f741bf0 100644
--- a/tests/epyccel/test_epyccel_modules.py
+++ b/tests/epyccel/test_epyccel_modules.py
@@ -200,3 +200,16 @@ def test_awkward_names(language):
     assert mod.function() == modnew.function()
     assert mod.pure() == modnew.pure()
     assert mod.allocate(1) == modnew.allocate(1)
+
+def test_cuda_module(language_with_cuda):
+    import modules.cuda_module as mod
+
+    modnew = epyccel(mod, language=language_with_cuda)
+
+    atts = ('g', 'r0', 'rmin', 'rmax', 'skip_centre',
+            'method', 'tiny')
+    for att in atts:
+        mod_att = getattr(mod, att)
+        modnew_att = getattr(modnew, att)
+        assert mod_att == modnew_att
+        assert type(mod_att) is type(modnew_att)

From b3de5498fa8ceb3880713a75f293a4fd26ae7aa3 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Thu, 27 Jun 2024 20:31:46 +0100
Subject: [PATCH 128/130] Add support for kernels (#42)

This pull request addresses issue #28 by implementing a new feature in
Pyccel that allows users to define custom GPU kernels. The syntax for
creating these kernels is inspired by Numba. and I also need to fix
issue #45 for testing purposes

**Commit Summary**

- Introduced KernelCall class
- Added cuda printer methods _print_KernelCall and _print_FunctionDef to
generate the corresponding CUDA representation for both kernel calls and
definitions
- Added IndexedFunctionCall  represents an indexed function call
- Added CUDA module and cuda.synchronize()
- Fixing a bug that I found in the header: it does not import the
necessary header for the used function

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
Co-authored-by: Emily Bourne <emily.bourne@epfl.ch>
---
 .dict_custom.txt                              |   1 +
 CHANGELOG.md                                  |   2 +
 docs/cuda.md                                  |  23 +++
 pyccel/ast/core.py                            |  37 ++++
 pyccel/ast/cuda.py                            |  65 +++++++
 pyccel/ast/cudaext.py                         |  42 +++++
 pyccel/ast/utilities.py                       |   4 +-
 pyccel/codegen/printing/cucode.py             |  46 ++++-
 pyccel/cuda/__init__.py                       |  10 +
 pyccel/cuda/cuda_sync_primitives.py           |  16 ++
 pyccel/decorators.py                          |  32 ++++
 pyccel/errors/messages.py                     |   8 +
 pyccel/parser/semantic.py                     |  84 ++++++++-
 pyccel/parser/syntactic.py                    |   4 +
 tests/conftest.py                             |   9 +
 tests/cuda/test_kernel_semantic.py            | 176 ++++++++++++++++++
 tests/pyccel/scripts/kernel/hello_kernel.py   |  19 ++
 .../scripts/kernel/kernel_name_collision.py   |   8 +
 tests/pyccel/test_pyccel.py                   |  22 ++-
 19 files changed, 599 insertions(+), 9 deletions(-)
 create mode 100644 docs/cuda.md
 create mode 100644 pyccel/ast/cuda.py
 create mode 100644 pyccel/ast/cudaext.py
 create mode 100644 pyccel/cuda/__init__.py
 create mode 100644 pyccel/cuda/cuda_sync_primitives.py
 create mode 100644 tests/cuda/test_kernel_semantic.py
 create mode 100644 tests/pyccel/scripts/kernel/hello_kernel.py
 create mode 100644 tests/pyccel/scripts/kernel/kernel_name_collision.py

diff --git a/.dict_custom.txt b/.dict_custom.txt
index 161337d33b..6ddf80b1ff 100644
--- a/.dict_custom.txt
+++ b/.dict_custom.txt
@@ -120,3 +120,4 @@ indexable
 traceback
 STC
 gFTL
+GPUs
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ac1c913a26..1c23db4e01 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file.
 
 -   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
 -   #48 : Fix incorrect handling of imports in `cuda`.
+-   #42 : Add support for custom kernel in`cuda`.
+-   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
 
 ## \[UNRELEASED\]
 
diff --git a/docs/cuda.md b/docs/cuda.md
new file mode 100644
index 0000000000..de30d52b80
--- /dev/null
+++ b/docs/cuda.md
@@ -0,0 +1,23 @@
+# Getting started GPU
+
+Pyccel now supports NVIDIA CUDA, empowering users to accelerate numerical computations on GPUs seamlessly. With Pyccel's high-level syntax and automatic code generation, harnessing the power of CUDA becomes effortless. This documentation provides a quick guide to enabling CUDA in Pyccel
+
+## Cuda Decorator
+
+### kernel
+
+The kernel decorator allows the user to declare a CUDA kernel. The kernel can be defined in Python, and the syntax is similar to that of Numba.
+
+```python
+from pyccel.decorators import kernel
+
+@kernel
+def my_kernel():
+    pass
+
+blockspergrid = 1
+threadsperblock = 1
+# Call your kernel function
+my_kernel[blockspergrid, threadsperblock]()
+
+```
\ No newline at end of file
diff --git a/pyccel/ast/core.py b/pyccel/ast/core.py
index 8981ddc160..2758b75be2 100644
--- a/pyccel/ast/core.py
+++ b/pyccel/ast/core.py
@@ -73,6 +73,7 @@
     'If',
     'IfSection',
     'Import',
+    'IndexedFunctionCall',
     'InProgram',
     'InlineFunctionDef',
     'Interface',
@@ -2065,6 +2066,42 @@ def _ignore(cls, c):
         """
         return c is None or isinstance(c, (FunctionDef, *cls._ignored_types))
 
+class IndexedFunctionCall(FunctionCall):
+    """
+    Represents an indexed function call in the code.
+
+    Class representing indexed function calls, encapsulating all
+    relevant information for such calls within the code base.
+
+    Parameters
+    ----------
+    func : FunctionDef
+        The function being called.
+
+    args : iterable of FunctionCallArgument
+        The arguments passed to the function.
+
+    indexes : iterable of TypedAstNode
+        The indexes of the function call.
+
+    current_function : FunctionDef, optional
+        The function where the call takes place.
+    """
+    __slots__ = ('_indexes',)
+    _attribute_nodes = FunctionCall._attribute_nodes + ('_indexes',)
+    def __init__(self, func, args, indexes, current_function = None):
+        self._indexes = indexes
+        super().__init__(func, args, current_function)
+
+    @property
+    def indexes(self):
+        """
+        Indexes of function call.
+
+        Represents the indexes of the function call
+        """
+        return self._indexes
+
 class ConstructorCall(FunctionCall):
 
     """
diff --git a/pyccel/ast/cuda.py b/pyccel/ast/cuda.py
new file mode 100644
index 0000000000..f1e50ef7f0
--- /dev/null
+++ b/pyccel/ast/cuda.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+CUDA Module
+This module provides a collection of classes and utilities for CUDA programming.
+"""
+from pyccel.ast.core import FunctionCall
+
+__all__ = (
+    'KernelCall',
+)
+
+class KernelCall(FunctionCall):
+    """
+    Represents a kernel function call in the code.
+
+    The class serves as a representation of a kernel
+    function call within the codebase.
+
+    Parameters
+    ----------
+    func : FunctionDef
+        The definition of the function being called.
+
+    args : iterable of FunctionCallArgument
+        The arguments passed to the function.
+
+    num_blocks : TypedAstNode
+        The number of blocks. These objects must have a primitive type of `PrimitiveIntegerType`.
+
+    tp_block : TypedAstNode
+        The number of threads per block. These objects must have a primitive type of `PrimitiveIntegerType`.
+
+    current_function : FunctionDef, optional
+        The function where the call takes place.
+    """
+    __slots__ = ('_num_blocks','_tp_block')
+    _attribute_nodes = (*FunctionCall._attribute_nodes, '_num_blocks', '_tp_block')
+
+    def __init__(self, func, args, num_blocks, tp_block, current_function = None):
+        self._num_blocks = num_blocks
+        self._tp_block = tp_block
+        super().__init__(func, args, current_function)
+
+    @property
+    def num_blocks(self):
+        """
+        The number of blocks in the kernel being called.
+
+        The number of blocks in the kernel being called.
+        """
+        return self._num_blocks
+
+    @property
+    def tp_block(self):
+        """
+        The number of threads per block.
+
+        The number of threads per block.
+        """
+        return self._tp_block
+
diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py
new file mode 100644
index 0000000000..b540f20993
--- /dev/null
+++ b/pyccel/ast/cudaext.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+CUDA Extension Module
+Provides CUDA functionality for code generation.
+"""
+from .internals      import PyccelFunction
+
+from .datatypes      import VoidType
+from .core           import Module, PyccelFunctionDef
+
+__all__ = (
+    'CudaSynchronize',
+)
+
+class CudaSynchronize(PyccelFunction):
+    """
+    Represents a call to Cuda.synchronize for code generation.
+
+    This class serves as a representation of the Cuda.synchronize method.
+    """
+    __slots__ = ()
+    _attribute_nodes = ()
+    _shape     = None
+    _class_type = VoidType()
+    def __init__(self):
+        super().__init__()
+
+cuda_funcs = {
+    'synchronize'       : PyccelFunctionDef('synchronize' , CudaSynchronize),
+}
+
+cuda_mod = Module('cuda',
+    variables=[],
+    funcs=cuda_funcs.values(),
+    imports=[]
+)
+
diff --git a/pyccel/ast/utilities.py b/pyccel/ast/utilities.py
index 1e6c0422ab..e5cd77b168 100644
--- a/pyccel/ast/utilities.py
+++ b/pyccel/ast/utilities.py
@@ -25,6 +25,7 @@
 from .literals      import LiteralInteger, LiteralEllipsis, Nil
 from .mathext       import math_mod
 from .sysext        import sys_mod
+from .cudaext       import cuda_mod
 
 from .numpyext      import (NumpyEmpty, NumpyArray, numpy_mod,
                             NumpyTranspose, NumpyLinspace)
@@ -49,7 +50,8 @@
 decorators_mod = Module('decorators',(),
         funcs = [PyccelFunctionDef(d, PyccelFunction) for d in pyccel_decorators.__all__])
 pyccel_mod = Module('pyccel',(),(),
-        imports = [Import('decorators', decorators_mod)])
+        imports = [Import('decorators', decorators_mod),
+                    Import('cuda', cuda_mod)])
 
 # TODO add documentation
 builtin_import_registry = Module('__main__',
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index 277d2a3a6a..cd26843017 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -9,11 +9,12 @@
 enabling the direct translation of high-level Pyccel expressions into CUDA code.
 """
 
-from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers
+from pyccel.codegen.printing.ccode  import CCodePrinter
 
-from pyccel.ast.core        import Import, Module
+from pyccel.ast.core                import Import, Module
+from pyccel.ast.literals            import Nil
 
-from pyccel.errors.errors   import Errors
+from pyccel.errors.errors           import Errors
 
 
 errors = Errors()
@@ -61,6 +62,44 @@ def _print_Module(self, expr):
         self.exit_scope()
         return code
 
+    def function_signature(self, expr, print_arg_names = True):
+        """
+        Get the Cuda representation of the function signature.
+
+        Extract from the function definition `expr` all the
+        information (name, input, output) needed to create the
+        function signature and return a string describing the
+        function.
+        This is not a declaration as the signature does not end
+        with a semi-colon.
+
+        Parameters
+        ----------
+        expr : FunctionDef
+            The function definition for which a signature is needed.
+
+        print_arg_names : bool, default : True
+            Indicates whether argument names should be printed.
+
+        Returns
+        -------
+        str
+            Signature of the function.
+        """
+        cuda_decorater = '__global__' if 'kernel' in expr.decorators else ''
+        c_function_signature = super().function_signature(expr, print_arg_names)
+        return f'{cuda_decorater} {c_function_signature}'
+
+    def _print_KernelCall(self, expr):
+        func = expr.funcdef
+        args = [a.value or Nil() for a in expr.args]
+
+        args = ', '.join(self._print(a) for a in args)
+        return f"{func.name}<<<{expr.num_blocks}, {expr.tp_block}>>>({args});\n"
+
+    def _print_CudaSynchronize(self, expr):
+        return 'cudaDeviceSynchronize();\n'
+
     def _print_ModuleHeader(self, expr):
         self.set_scope(expr.module.scope)
         self._in_header = True
@@ -87,6 +126,7 @@ def _print_ModuleHeader(self, expr):
                     }}\n'
         return '\n'.join((f"#ifndef {name.upper()}_H",
                           f"#define {name.upper()}_H",
+                          imports,
                           global_variables,
                           function_declaration,
                           "#endif // {name.upper()}_H\n"))
diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py
new file mode 100644
index 0000000000..e8542ad5d5
--- /dev/null
+++ b/pyccel/cuda/__init__.py
@@ -0,0 +1,10 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+    This module is for exposing the CudaSubmodule functions.
+"""
+from .cuda_sync_primitives    import synchronize
+
+__all__ = ['synchronize']
diff --git a/pyccel/cuda/cuda_sync_primitives.py b/pyccel/cuda/cuda_sync_primitives.py
new file mode 100644
index 0000000000..f3442fe9e2
--- /dev/null
+++ b/pyccel/cuda/cuda_sync_primitives.py
@@ -0,0 +1,16 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+This submodule contains CUDA methods for Pyccel.
+"""
+
+
+def synchronize():
+    """
+    Synchronize CUDA device execution.
+
+    Synchronize CUDA device execution.
+    """
+
diff --git a/pyccel/decorators.py b/pyccel/decorators.py
index 1f640043db..77717a991f 100644
--- a/pyccel/decorators.py
+++ b/pyccel/decorators.py
@@ -19,6 +19,7 @@
     'sympy',
     'template',
     'types',
+    'kernel'
 )
 
 
@@ -109,3 +110,34 @@ def allow_negative_index(f,*args):
     def identity(f):
         return f
     return identity
+
+def kernel(f):
+    """
+    Decorator for marking a Python function as a kernel.
+
+    This class serves as a decorator to mark a Python function
+    as a kernel function, typically used for GPU computations.
+    This allows the function to be indexed with the number of blocks and threads.
+
+    Parameters
+    ----------
+    f : function
+        The function to which the decorator is applied.
+
+    Returns
+    -------
+    KernelAccessor
+        A class representing the kernel function.
+    """
+    class KernelAccessor:
+        """
+        Class representing the kernel function.
+
+        Class representing the kernel function.
+        """
+        def __init__(self, f):
+            self._f = f
+        def __getitem__(self, args):
+            return self._f
+
+    return KernelAccessor(f)
diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py
index 79eccc1df2..09966d810c 100644
--- a/pyccel/errors/messages.py
+++ b/pyccel/errors/messages.py
@@ -162,3 +162,11 @@
 WRONG_LINSPACE_ENDPOINT = 'endpoint argument must be boolean'
 NON_LITERAL_KEEP_DIMS = 'keep_dims argument must be a literal, otherwise rank is unknown'
 NON_LITERAL_AXIS = 'axis argument must be a literal, otherwise pyccel cannot determine which dimension to operate on'
+MISSING_KERNEL_CONFIGURATION = 'Kernel launch configuration not specified'
+INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch'
+INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call'
+INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call'
+
+
+
+
diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py
index f6e9f34f39..29e851b20c 100644
--- a/pyccel/parser/semantic.py
+++ b/pyccel/parser/semantic.py
@@ -116,6 +116,8 @@
 from pyccel.ast.variable import IndexedElement, AnnotatedPyccelSymbol
 from pyccel.ast.variable import DottedName, DottedVariable
 
+from pyccel.ast.cuda import     KernelCall
+
 from pyccel.errors.errors import Errors
 from pyccel.errors.errors import PyccelSemanticError
 
@@ -133,7 +135,9 @@
         PYCCEL_RESTRICTION_LIST_COMPREHENSION_LIMITS, PYCCEL_RESTRICTION_LIST_COMPREHENSION_SIZE,
         UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE,
         PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT,
-        FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC)
+        FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET,
+        MISSING_KERNEL_CONFIGURATION,
+        INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK)
 
 from pyccel.parser.base      import BasicParser
 from pyccel.parser.syntactic import SyntaxParser
@@ -1139,6 +1143,67 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun
 
             return new_expr
 
+    def _handle_kernel(self, expr, func, args):
+        """
+        Create the node representing the kernel function call.
+
+        Create a FunctionCall or an instance of a PyccelInternalFunction
+        from the function information and arguments.
+
+        Parameters
+        ----------
+        expr : IndexedFunctionCall
+               Node has all the information about the function call.
+
+        func : FunctionDef | Interface | PyccelInternalFunction type
+               The function being called.
+
+        args : iterable of FunctionCallArgument
+               The arguments passed to the function.
+
+        Returns
+        -------
+        Pyccel.ast.cuda.KernelCall
+            The semantic representation of the kernel call.
+        """
+        if len(expr.indexes) != 2:
+            errors.report(INVALID_KERNEL_LAUNCH_CONFIG,
+                    symbol=expr,
+                    severity='fatal')
+        if len(func.results):
+            errors.report(f"cuda kernel function '{func.name}' returned a value in violation of the laid-down specification",
+                         symbol=expr,
+                         severity='fatal')
+        if isinstance(func, FunctionDef) and len(args) != len(func.arguments):
+            errors.report(f"{len(args)} argument types given, but function takes {len(func.arguments)} arguments",
+                symbol=expr,
+                severity='fatal')
+        if not isinstance(expr.indexes[0], (LiteralInteger)):
+            if isinstance(expr.indexes[0], PyccelSymbol):
+                num_blocks = self.get_variable(expr.indexes[0])
+
+                if not isinstance(num_blocks.dtype, PythonNativeInt):
+                    errors.report(INVALID_KERNEL_CALL_BP_GRID,
+                    symbol = expr,
+                    severity='fatal')
+            else:
+                errors.report(INVALID_KERNEL_CALL_BP_GRID,
+                    symbol = expr,
+                    severity='fatal')
+        if not isinstance(expr.indexes[1], (LiteralInteger)):
+            if isinstance(expr.indexes[1], PyccelSymbol):
+                tp_block = self.get_variable(expr.indexes[1])
+                if not isinstance(tp_block.dtype, PythonNativeInt):
+                    errors.report(INVALID_KERNEL_CALL_TP_BLOCK,
+                    symbol = expr,
+                    severity='fatal')
+            else:
+                errors.report(INVALID_KERNEL_CALL_TP_BLOCK,
+                    symbol = expr,
+                    severity='fatal')
+        new_expr = KernelCall(func, args, expr.indexes[0], expr.indexes[1])
+        return new_expr
+
     def _sort_function_call_args(self, func_args, args):
         """
         Sort and add the missing call arguments to match the arguments in the function definition.
@@ -2852,6 +2917,23 @@ def _visit_Lambda(self, expr):
                 expr = Lambda(tuple(expr.variables), expr_new)
         return expr
 
+    def _visit_IndexedFunctionCall(self, expr):
+        name     = expr.funcdef
+        name = self.scope.get_expected_name(name)
+        func     = self.scope.find(name, 'functions')
+        args = self._handle_function_args(expr.args)
+
+        if func is None:
+            return errors.report(UNDEFINED_FUNCTION, symbol=expr.funcdef,
+                    bounding_box=(self.current_ast_node.lineno, self.current_ast_node.col_offset),
+                    severity='fatal')
+
+        func = self._annotate_the_called_function_def(func)
+        if 'kernel' in func.decorators :
+            return self._handle_kernel(expr, func, args)
+        else:
+            return errors.report("Unknown function type",
+                symbol=expr, severity='fatal')
     def _visit_FunctionCall(self, expr):
         name     = expr.funcdef
         try:
diff --git a/pyccel/parser/syntactic.py b/pyccel/parser/syntactic.py
index 318b765703..0cfe895605 100644
--- a/pyccel/parser/syntactic.py
+++ b/pyccel/parser/syntactic.py
@@ -64,6 +64,8 @@
 
 from pyccel.ast.type_annotations import SyntacticTypeAnnotation, UnionTypeAnnotation
 
+from pyccel.ast.core import IndexedFunctionCall
+
 from pyccel.parser.base        import BasicParser
 from pyccel.parser.extend_tree import extend_tree
 from pyccel.parser.utilities   import get_default_path
@@ -1101,6 +1103,8 @@ def _visit_Call(self, stmt):
         elif isinstance(func, DottedName):
             func_attr = FunctionCall(func.name[-1], args)
             func = DottedName(*func.name[:-1], func_attr)
+        elif isinstance(func,IndexedElement):
+            func = IndexedFunctionCall(func.base, args, func.indices)
         else:
             raise NotImplementedError(f' Unknown function type {type(func)}')
 
diff --git a/tests/conftest.py b/tests/conftest.py
index a5082ef6e8..4e74d1ec7a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -59,6 +59,15 @@ def pytest_runtest_teardown(item, nextitem):
 
 def pytest_addoption(parser):
     parser.addoption("--developer-mode", action="store_true", default=github_debugging, help="Show tracebacks when pyccel errors are raised")
+    parser.addoption("--gpu_available", action="store_true",
+                default=False, help="enable GPU tests")
+
+def pytest_generate_tests(metafunc):
+    if "gpu_available" in metafunc.fixturenames:
+        if metafunc.config.getoption("gpu_available"):
+            metafunc.parametrize("gpu_available", [True])
+        else:
+            metafunc.parametrize("gpu_available", [False])
 
 def pytest_sessionstart(session):
     # setup_stuff
diff --git a/tests/cuda/test_kernel_semantic.py b/tests/cuda/test_kernel_semantic.py
new file mode 100644
index 0000000000..00b74c3bea
--- /dev/null
+++ b/tests/cuda/test_kernel_semantic.py
@@ -0,0 +1,176 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import pytest
+
+from pyccel import epyccel
+from pyccel.decorators import kernel
+from pyccel.errors.errors import Errors, PyccelSemanticError
+from pyccel.errors.messages import (INVALID_KERNEL_CALL_TP_BLOCK,
+                                    INVALID_KERNEL_CALL_BP_GRID,
+                                    INVALID_KERNEL_LAUNCH_CONFIG)
+
+
+@pytest.mark.cuda
+def test_invalid_block_number():
+    def invalid_block_number():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1.0
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_block_number, language="cuda")
+
+    assert errors.has_errors()
+
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_CALL_BP_GRID == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_thread_per_block():
+    def invalid_thread_per_block():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1.0
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_thread_per_block, language="cuda")
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_CALL_TP_BLOCK == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_launch_config_high():
+    def invalid_launch_config_high():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        third_param = 1
+        kernel_call[blocks_per_grid, threads_per_block, third_param]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_launch_config_high, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_launch_config_low():
+    def invalid_launch_config_low():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        kernel_call[blocks_per_grid]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_launch_config_low, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_arguments_for_kernel_call():
+    def invalid_arguments():
+        @kernel
+        def kernel_call(arg : int):
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block]()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_arguments, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "0 argument types given, but function takes 1 arguments" == error_info.message
+
+
+@pytest.mark.cuda
+def test_invalid_arguments_for_kernel_call_2():
+    def invalid_arguments_():
+        @kernel
+        def kernel_call():
+            pass
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block](1)
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_arguments_, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "1 argument types given, but function takes 0 arguments" == error_info.message
+
+
+@pytest.mark.cuda
+def test_kernel_return():
+    def kernel_return():
+        @kernel
+        def kernel_call():
+            return 7
+
+        blocks_per_grid = 1
+        threads_per_block = 1
+        kernel_call[blocks_per_grid, threads_per_block](1)
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(kernel_return, language="cuda")
+
+    assert errors.has_errors()
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert error_info.symbol.funcdef == 'kernel_call'
+    assert "cuda kernel function 'kernel_call' returned a value in violation of the laid-down specification" == error_info.message
diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py
new file mode 100644
index 0000000000..b6901b25a1
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/hello_kernel.py
@@ -0,0 +1,19 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+from pyccel import cuda
+
+@kernel
+def say_hello(its_morning : bool):
+    if(its_morning):
+        print("Hello and Good morning")
+    else:
+        print("Hello and Good afternoon")
+
+def f():
+    its_morning = True
+    say_hello[1,1](its_morning)
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
+
diff --git a/tests/pyccel/scripts/kernel/kernel_name_collision.py b/tests/pyccel/scripts/kernel/kernel_name_collision.py
new file mode 100644
index 0000000000..ac7abe25ae
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/kernel_name_collision.py
@@ -0,0 +1,8 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+
+@kernel
+def do():
+    pass
+
+do[1,1]()
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index ec1e846549..b4757a3c31 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -294,7 +294,7 @@ def compare_pyth_fort_output( p_output, f_output, dtype=float, language=None):
 #------------------------------------------------------------------------------
 def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True,
         cwd = None, pyccel_commands = "", output_dtype = float,
-        language = None, output_dir = None):
+        language = None, output_dir = None, execute_code = True):
     """
     Run pyccel and compare the output to ensure that the results
     are equivalent
@@ -394,13 +394,14 @@ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True,
             compile_fortran(cwd, output_test_file, dependencies)
         elif language == 'c':
             compile_c(cwd, output_test_file, dependencies)
-
-    lang_out = get_lang_output(output_test_file, language)
-    compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language)
+    if  execute_code:
+        lang_out = get_lang_output(output_test_file, language)
+        compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language)
 
 #==============================================================================
 # UNIT TESTS
 #==============================================================================
+
 def test_relative_imports_in_project(language):
 
     base_dir = os.path.dirname(os.path.realpath(__file__))
@@ -728,6 +729,19 @@ def test_multiple_results(language):
 def test_elemental(language):
     pyccel_test("scripts/decorators_elemental.py", language = language)
 
+#------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_hello_kernel(gpu_available):
+    types = str
+    pyccel_test("scripts/kernel/hello_kernel.py",
+            language="cuda", output_dtype=types , execute_code=gpu_available)
+
+#------------------------------------------------------------------------------
+@pytest.mark.cuda
+def test_kernel_collision(gpu_available):
+    pyccel_test("scripts/kernel/kernel_name_collision.py",
+            language="cuda", execute_code=gpu_available)
+
 #------------------------------------------------------------------------------
 def test_print_strings(language):
     types = str

From 4ac5182c14ed1a11c1b42cf457956ded9d5a2e27 Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 3 Jul 2024 17:37:02 +0100
Subject: [PATCH 129/130] Updated CUDA Name Clash Checker By Added
 CUDA-specific keywords (#60)

This pull request addresses issue #59 by adding more CUDA-specific
keywords to enhance the checking of variable/function names and prevent
name clashes

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
Co-authored-by: bauom <40796259+bauom@users.noreply.github.com>
---
 CHANGELOG.md                              |  1 +
 pyccel/naming/cudanameclashchecker.py     | 36 ++++++++++++++++++++++-
 pyccel/naming/languagenameclashchecker.py |  5 ++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1c23db4e01..2a00982705 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file.
 
 -   #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option.
 -   #48 : Fix incorrect handling of imports in `cuda`.
+-   #59 : Updated `cuda` clash checker.
 -   #42 : Add support for custom kernel in`cuda`.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
 
diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py
index 971204e912..c7aaa4952f 100644
--- a/pyccel/naming/cudanameclashchecker.py
+++ b/pyccel/naming/cudanameclashchecker.py
@@ -16,6 +16,7 @@ class CudaNameClashChecker(LanguageNameClashChecker):
     verify that they do not cause name clashes. Name clashes may be due to
     new variables, or due to the use of reserved keywords.
     """
+
     # Keywords as mentioned on https://en.cppreference.com/w/c/keyword
     keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const',
         'continue', 'default', 'do', 'double', 'else', 'enum',
@@ -37,7 +38,40 @@ class CudaNameClashChecker(LanguageNameClashChecker):
         'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX',
         'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer',
         'get_index', 'numpy_to_ndarray_strides',
-        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'])
+        'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data'
+        '__global__', '__device__', '__host__','__constant__', '__shared__',
+        '__managed__','threadIdx', 'blockIdx', 'blockDim', 'gridDim',
+        'warpSize', 'cudaMalloc', 'cudaFree', 'cudaMemcpy', 'cudaMemset',
+        'cudaMallocHost', 'cudaFreeHost', 'cudaMallocPitch',
+        'cudaMallocArray', 'cudaFreeArray', 'cudaHostAlloc',
+        'cudaHostRegister', 'cudaHostUnregister', 'cudaHostGetDevicePointer',
+        'cudaHostGetFlags', 'cudaDeviceSynchronize', 'cudaDeviceReset',
+        'cudaSetDevice', 'cudaGetDeviceCount', 'cudaGetDeviceProperties',
+        'cudaChooseDevice', 'cudaSetDeviceFlags', 'cudaGetDevice',
+        'cudaStreamCreate', 'cudaStreamDestroy', 'cudaStreamSynchronize',
+        'cudaStreamWaitEvent', 'cudaEventCreate', 'cudaEventDestroy', 'cudaEventRecord',
+        'cudaEventSynchronize', 'cudaEventElapsedTime', 'cuInit', 'cuDeviceGet',
+        'cuDeviceGetCount', 'cuDeviceGetName',
+        'cuDeviceComputeCapability', 'cuCtxCreate', 'cuCtxDestroy',
+        'cuCtxSynchronize', 'cuModuleLoad', 'cuModuleUnload',
+        'cuModuleGetFunction', 'cuModuleGetGlobal', 'cuModuleGetTexRef',
+        'cuMemAlloc', 'cuMemFree', 'cuMemcpyHtoD', 'cuMemcpyDtoH',
+        'cuMemcpyDtoD', 'cuMemcpyHtoDAsync', 'cuMemcpyDtoHAsync',
+        'cuMemcpyDtoDAsync', 'cuMemsetD8', 'cuMemsetD16', 'cuMemsetD32',
+        'cuMemsetD2D8', 'cuMemsetD2D16', 'cuMemsetD2D32', 'cuParamSetSize',
+        'cuParamSeti', 'cuParamSetf', 'cuParamSetv', 'cuLaunch', 'cuLaunchGrid',
+        'cuLaunchGridAsync', 'cuEventCreate', 'cuEventRecord', 'cuEventQuery',
+        'cuEventSynchronize', 'cuEventDestroy', 'cuEventElapsedTime',
+        'cuStreamCreate', 'cuStreamQuery', 'cuStreamSynchronize',
+        'cuStreamDestroy', 'cuFuncSetBlockShape', 'cuFuncSetSharedSize',
+        'cuFuncGetAttribute', 'cuTexRefCreate', 'cuTexRefDestroy',
+        'cuTexRefSetArray', 'cuTexRefSetAddress', 'cuTexRefSetAddress2D',
+        'cuTexRefSetFormat', 'cuTexRefSetAddressMode', 'cuTexRefSetFilterMode',
+        'cuTexRefSetFlags', 'cuTexRefGetAddress', 'cuTexRefGetArray',
+        'cuTexRefGetAddressMode', 'cuTexRefGetFilterMode', 'cuTexRefGetFormat',
+        'cuTexRefGetFlags', 'cuLaunchKernel', 'cuOccupancyMaxActiveBlocksPerMultiprocessor',
+        'cuOccupancyMaxPotentialBlockSize', 'cuOccupancyMaxPotentialBlockSizeWithFlags'
+    ])
 
     def has_clash(self, name, symbols):
         """
diff --git a/pyccel/naming/languagenameclashchecker.py b/pyccel/naming/languagenameclashchecker.py
index fa672a905b..d6415e6449 100644
--- a/pyccel/naming/languagenameclashchecker.py
+++ b/pyccel/naming/languagenameclashchecker.py
@@ -19,6 +19,11 @@ class LanguageNameClashChecker(metaclass = Singleton):
     """
     keywords = None
 
+    def __init__(self): #pylint: disable=useless-parent-delegation
+        # This __init__ function is required so the ArgumentSingleton can
+        # always detect a signature
+        super().__init__()
+
     def _get_collisionless_name(self, name, symbols):
         """
         Get a name which doesn't collision with keywords or symbols.

From 12d98b656db22e50866f32d59a59022bc1ca313d Mon Sep 17 00:00:00 2001
From: Said Mazouz <95222894+smazouz42@users.noreply.github.com>
Date: Wed, 3 Jul 2024 18:04:22 +0100
Subject: [PATCH 130/130] add handle for custom device (#61)

This pull request addresses issue
https://github.com/pyccel/pyccel-cuda/issues/41 by implementing a new
feature in Pyccel that allows users to define a custom device

**Commit Summary**

- Adding handler for custom device and its code generation.
- Adding test

---------

Co-authored-by: EmilyBourne <louise.bourne@gmail.com>
---
 CHANGELOG.md                               |  1 +
 docs/cuda.md                               | 25 ++++++++++++++++-
 pyccel/codegen/printing/cucode.py          |  7 ++---
 pyccel/decorators.py                       | 19 +++++++++++++
 pyccel/errors/messages.py                  |  2 +-
 pyccel/parser/semantic.py                  |  7 ++++-
 tests/cuda/test_device_semantic.py         | 31 ++++++++++++++++++++++
 tests/pyccel/scripts/kernel/device_test.py | 18 +++++++++++++
 tests/pyccel/test_pyccel.py                |  8 ++++++
 9 files changed, 112 insertions(+), 6 deletions(-)
 create mode 100644 tests/cuda/test_device_semantic.py
 create mode 100644 tests/pyccel/scripts/kernel/device_test.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2a00982705..f88958cbf2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file.
 -   #59 : Updated `cuda` clash checker.
 -   #42 : Add support for custom kernel in`cuda`.
 -   #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function.
+-   #41 : Add support for custom device in`cuda`.
 
 ## \[UNRELEASED\]
 
diff --git a/docs/cuda.md b/docs/cuda.md
index de30d52b80..7643a4ac02 100644
--- a/docs/cuda.md
+++ b/docs/cuda.md
@@ -20,4 +20,27 @@ threadsperblock = 1
 # Call your kernel function
 my_kernel[blockspergrid, threadsperblock]()
 
-```
\ No newline at end of file
+```
+
+### device
+
+Device functions are similar to kernels, but are executed within the context of a kernel. They can be called only from kernels or device functions, and are typically used for operations that are too small to justify launching a separate kernel, or for operations that need to be performed repeatedly within the context of a kernel.
+
+```python
+from pyccel.decorators import device, kernel
+
+@device
+def add(x, y):
+    return x + y
+
+@kernel
+def my_kernel():
+    x = 1
+    y = 2
+    z = add(x, y)
+    print(z)
+
+my_kernel[1, 1]()
+
+```
+
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
index cd26843017..7c01d93c47 100644
--- a/pyccel/codegen/printing/cucode.py
+++ b/pyccel/codegen/printing/cucode.py
@@ -86,9 +86,10 @@ def function_signature(self, expr, print_arg_names = True):
         str
             Signature of the function.
         """
-        cuda_decorater = '__global__' if 'kernel' in expr.decorators else ''
+        cuda_decorator = '__global__' if 'kernel' in expr.decorators else \
+        '__device__' if 'device' in expr.decorators else ''
         c_function_signature = super().function_signature(expr, print_arg_names)
-        return f'{cuda_decorater} {c_function_signature}'
+        return f'{cuda_decorator} {c_function_signature}'
 
     def _print_KernelCall(self, expr):
         func = expr.funcdef
@@ -109,7 +110,7 @@ def _print_ModuleHeader(self, expr):
         cuda_headers = ""
         for f in expr.module.funcs:
             if not f.is_inline:
-                if 'kernel' in f.decorators:  # Checking for 'kernel' decorator
+                if 'kernel' in f.decorators or 'device' in f.decorators:
                     cuda_headers += self.function_signature(f) + ';\n'
                 else:
                     funcs += self.function_signature(f) + ';\n'
diff --git a/pyccel/decorators.py b/pyccel/decorators.py
index 77717a991f..ff413fe443 100644
--- a/pyccel/decorators.py
+++ b/pyccel/decorators.py
@@ -11,6 +11,7 @@
 __all__ = (
     'allow_negative_index',
     'bypass',
+    'device',
     'elemental',
     'inline',
     'private',
@@ -141,3 +142,21 @@ def __getitem__(self, args):
             return self._f
 
     return KernelAccessor(f)
+
+def device(f):
+    """
+    Decorator for marking a function as a GPU device function.
+
+    This decorator is used to mark a Python function as a GPU device function.
+
+    Parameters
+    ----------
+    f : Function
+        The function to be marked as a device.
+
+    Returns
+    -------
+    f
+        The function marked as a device.
+    """
+    return f
diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py
index 09966d810c..5fe622c29b 100644
--- a/pyccel/errors/messages.py
+++ b/pyccel/errors/messages.py
@@ -166,7 +166,7 @@
 INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch'
 INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call'
 INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call'
-
+INVAlID_DEVICE_CALL = 'A function decorated with "device" should be called only from a "kernel" or another "device" function.'
 
 
 
diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py
index 29e851b20c..6b4143b442 100644
--- a/pyccel/parser/semantic.py
+++ b/pyccel/parser/semantic.py
@@ -136,9 +136,10 @@
         UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE,
         PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT,
         FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET,
-        MISSING_KERNEL_CONFIGURATION,
+        MISSING_KERNEL_CONFIGURATION, INVAlID_DEVICE_CALL,
         INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK)
 
+
 from pyccel.parser.base      import BasicParser
 from pyccel.parser.syntactic import SyntaxParser
 
@@ -1061,6 +1062,10 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun
         FunctionCall/PyccelFunction
             The semantic representation of the call.
         """
+
+        if isinstance(func, FunctionDef) and 'device' in func.decorators:
+            if 'kernel' not in self.scope.decorators and 'device' not in self.scope.decorators:
+                errors.report(INVAlID_DEVICE_CALL,symbol=expr, severity='fatal')
         if isinstance(func, PyccelFunctionDef):
             if use_build_functions:
                 annotation_method = '_build_' + func.cls_name.__name__
diff --git a/tests/cuda/test_device_semantic.py b/tests/cuda/test_device_semantic.py
new file mode 100644
index 0000000000..5723991961
--- /dev/null
+++ b/tests/cuda/test_device_semantic.py
@@ -0,0 +1,31 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+import pytest
+
+from pyccel import epyccel
+from pyccel.decorators import device
+from pyccel.errors.errors import Errors, PyccelSemanticError
+from pyccel.errors.messages import (INVAlID_DEVICE_CALL,)
+
+
+@pytest.mark.cuda
+def test_invalid_device_call():
+    def invalid_device_call():
+        @device
+        def device_call():
+            pass
+        def fake_kernel_call():
+            device_call()
+
+        fake_kernel_call()
+
+    errors = Errors()
+
+    with pytest.raises(PyccelSemanticError):
+        epyccel(invalid_device_call, language="cuda")
+
+    assert errors.has_errors()
+
+    assert errors.num_messages() == 1
+
+    error_info = [*errors.error_info_map.values()][0][0]
+    assert INVAlID_DEVICE_CALL == error_info.message
diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py
new file mode 100644
index 0000000000..a4762a6242
--- /dev/null
+++ b/tests/pyccel/scripts/kernel/device_test.py
@@ -0,0 +1,18 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import device, kernel
+from pyccel import cuda
+
+@device
+def device_call():
+    print("Hello from device")
+
+@kernel
+def kernel_call():
+    device_call()
+
+def f():
+    kernel_call[1,1]()
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
index b4757a3c31..2d55c6e1cb 100644
--- a/tests/pyccel/test_pyccel.py
+++ b/tests/pyccel/test_pyccel.py
@@ -742,6 +742,14 @@ def test_kernel_collision(gpu_available):
     pyccel_test("scripts/kernel/kernel_name_collision.py",
             language="cuda", execute_code=gpu_available)
 
+#------------------------------------------------------------------------------
+
+@pytest.mark.cuda
+def test_device_call(gpu_available):
+    types = str
+    pyccel_test("scripts/kernel/device_test.py",
+            language="cuda", output_dtype=types, execute_code=gpu_available)
+
 #------------------------------------------------------------------------------
 def test_print_strings(language):
     types = str