Merge pull request #2282 from devitocodes/sycl-init

compiler: Misc code generation improvements
devitocodes · Dec 21, 2023 · c888cee · c888cee
2 parents 3126fb0 + 5f63560
commit c888cee
Show file tree

Hide file tree

Showing 26 changed files with 986 additions and 387 deletions.
diff --git a/.github/workflows/docker-bases.yml b/.github/workflows/docker-bases.yml
@@ -10,6 +10,7 @@ on:
       - '/docker/Dockerfile.nvidia'
       - '/docker/Dockerfile.cpu'
       - '/docker/Dockerfile.amd'
+      - '/docker/Dockerfile.intel'
   workflow_dispatch:
     inputs:
       tags:
@@ -95,12 +96,32 @@ jobs:
         uses: docker/build-push-action@v3
         with:
           context: .
-          file: './docker/Dockerfile.cpu'
+          file: './docker/Dockerfile.intel'
           push: true
           target: 'icx'
           build-args: 'arch=icx'
           tags: 'devitocodes/bases:cpu-icx'
 
+      - name: SYCL CPU image
+        uses: docker/build-push-action@v3
+        with:
+          context: .
+          file: './docker/Dockerfile.intel'
+          push: true
+          target: 'cpu-sycl'
+          build-args: 'arch=cpu-sycl'
+          tags: 'devitocodes/bases:cpu-sycl'
+
+      - name: SYCL GPU image
+        uses: docker/build-push-action@v3
+        with:
+          context: .
+          file: './docker/Dockerfile.intel'
+          push: true
+          target: 'gpu-sycl'
+          build-args: 'arch=gpu-sycl'
+          tags: 'devitocodes/bases:gpu-sycl'
+
 #######################################################
 ################### Nvidia nvhpc ######################
 #######################################################

diff --git a/devito/arch/archinfo.py b/devito/arch/archinfo.py
@@ -29,7 +29,7 @@
            # Generic GPUs
            'AMDGPUX', 'NVIDIAX', 'INTELGPUX',
            # Intel GPUs
-           'PVC']
+           'PVC', 'INTELGPUMAX', 'MAX1100', 'MAX1550']
 
 
 @memoized_func
@@ -848,10 +848,14 @@ def march(cls):
 
 # Devices
 NVIDIAX = NvidiaDevice('nvidiaX')
+
 AMDGPUX = AmdDevice('amdgpuX')
-INTELGPUX = IntelDevice('intelgpuX')
 
-PVC = IntelDevice('pvc', max_threads_per_block=4096)  # Intel Ponte Vecchio GPU
+INTELGPUX = IntelDevice('intelgpuX')
+PVC = IntelDevice('pvc', max_threads_per_block=4096)  # Legacy codename for MAX GPUs
+INTELGPUMAX = IntelDevice('intelgpuMAX', max_threads_per_block=4096)
+MAX1100 = IntelDevice('max1100', max_threads_per_block=4096)
+MAX1550 = IntelDevice('max1550', max_threads_per_block=4096)
 
 platform_registry = Platform.registry
 platform_registry['cpu64'] = get_platform  # Autodetection

diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py
@@ -2,18 +2,20 @@
 from hashlib import sha1
 from os import environ, path, makedirs
 from packaging.version import Version
-from subprocess import DEVNULL, PIPE, CalledProcessError, check_output, check_call, run
+from subprocess import (DEVNULL, PIPE, CalledProcessError, check_output,
+                        check_call, run)
 import platform
 import warnings
 import sys
 import time
 
 import numpy.ctypeslib as npct
 from codepy.jit import compile_from_string
-from codepy.toolchain import GCCToolchain, call_capture_output as _call_capture_output
+from codepy.toolchain import (GCCToolchain,
+                              call_capture_output as _call_capture_output)
 
 from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, POWER8, POWER9, GRAVITON,
-                         INTELGPUX, PVC, get_nvidia_cc, check_cuda_runtime,
+                         IntelDevice, get_nvidia_cc, check_cuda_runtime,
                          get_m1_llvm_path)
 from devito.exceptions import CompilationError
 from devito.logger import debug, warning, error
@@ -716,7 +718,6 @@ def __lookup_cmds__(self):
 class IntelCompiler(Compiler):
 
     def __init_finalize__(self, **kwargs):
-
         platform = kwargs.pop('platform', configuration['platform'])
         language = kwargs.pop('language', configuration['language'])
 
@@ -734,13 +735,20 @@ def __init_finalize__(self, **kwargs):
         if language == 'openmp':
             self.ldflags.append('-qopenmp')
 
-        # Make sure the MPI compiler uses `icc` underneath -- whatever the MPI distro is
         if kwargs.get('mpi'):
-            mpi_distro = sniff_mpi_distro('mpiexec')
-            if mpi_distro != 'IntelMPI':
-                warning("Expected Intel MPI distribution with `%s`, but found `%s`"
-                        % (self.__class__.__name__, mpi_distro))
-            self.cflags.insert(0, '-cc=%s' % self.CC)
+            self.__init_intel_mpi__()
+            self.__init_intel_mpi_flags__()
+
+    def __init_intel_mpi__(self, **kwargs):
+        # Make sure the MPI compiler uses an Intel compiler underneath,
+        # whatever the MPI distro is
+        mpi_distro = sniff_mpi_distro('mpiexec')
+        if mpi_distro != 'IntelMPI':
+            warning("Expected Intel MPI distribution with `%s`, but found `%s`"
+                    % (self.__class__.__name__, mpi_distro))
+
+    def __init_intel_mpi_flags__(self, **kwargs):
+        self.cflags.insert(0, '-cc=%s' % self.CC)
 
     def get_version(self):
         if configuration['mpi']:
@@ -792,36 +800,80 @@ def __init_finalize__(self, **kwargs):
         platform = kwargs.pop('platform', configuration['platform'])
         language = kwargs.pop('language', configuration['language'])
 
-        # Earlier versions to OneAPI 2023.2.0 (clang17 underneath), have an OpenMP bug
-        if self.version < Version('17.0.0') and language == 'openmp':
-            self.ldflags.remove('-qopenmp')
-            self.ldflags.append('-fopenmp')
-
         if language == 'sycl':
-            self.cflags.append('-fsycl')
-            if platform is NVIDIAX:
-                self.cflags.append('-fsycl-targets=nvptx64-cuda')
-            else:
-                self.cflags.append('-fsycl-targets=spir64')
+            raise ValueError("Use SyclCompiler to jit-compile sycl")
+
+        elif language == 'openmp':
+            # Earlier versions to OneAPI 2023.2.0 (clang17 underneath), have an
+            # OpenMP bug concerning reductions, hence with them we're forced to
+            # use the obsolete -fopenmp
+            if self.version < Version('17.0.0'):
+                self.ldflags.remove('-qopenmp')
+                self.ldflags.append('-fopenmp')
 
             if platform is NVIDIAX:
                 self.cflags.append('-fopenmp-targets=nvptx64-cuda')
-        if platform in [INTELGPUX, PVC]:
-            self.ldflags.append('-fiopenmp')
-            self.ldflags.append('-fopenmp-targets=spir64')
-            self.ldflags.append('-fopenmp-target-simd')
+            elif isinstance(platform, IntelDevice):
+                self.cflags.append('-fiopenmp')
+                self.cflags.append('-fopenmp-targets=spir64')
+                self.cflags.append('-fopenmp-target-simd')
+
+                self.cflags.remove('-g')  # -g disables some optimizations in IGC
+                self.cflags.append('-gline-tables-only')
+                self.cflags.append('-fdebug-info-for-profiling')
+
+    def __init_intel_mpi__(self, **kwargs):
+        IntelCompiler.__init_intel_mpi__(self, **kwargs)
 
-            self.cflags.remove('-g')  # -g disables some optimizations in IGC
-            self.cflags.append('-gline-tables-only')
-            self.cflags.append('-fdebug-info-for-profiling')
+        platform = kwargs.pop('platform', configuration['platform'])
+
+        # The Intel toolchain requires the I_MPI_OFFLOAD env var to be set
+        # to enable GPU-aware MPI (that is, passing device pointers to MPI calls)
+        if isinstance(platform, IntelDevice):
+            environ['I_MPI_OFFLOAD'] = '1'
+
+    def __init_intel_mpi_flags__(self, **kwargs):
+        pass
+
+    get_version = Compiler.get_version
 
     def __lookup_cmds__(self):
         # OneAPI HPC ToolKit comes with icpx, which is clang++,
         # and icx, which is clang
         self.CC = 'icx'
         self.CXX = 'icpx'
-        self.MPICC = 'mpicc'
-        self.MPICXX = 'mpicxx'
+        self.MPICC = 'mpiicx'
+        self.MPICXX = 'mpiicpx'
+
+
+class SyclCompiler(OneapiCompiler):
+
+    _cpp = True
+
+    def __init_finalize__(self, **kwargs):
+        IntelCompiler.__init_finalize__(self, **kwargs)
+
+        platform = kwargs.pop('platform', configuration['platform'])
+        language = kwargs.pop('language', configuration['language'])
+
+        if language != 'sycl':
+            raise ValueError("Expected language sycl with SyclCompiler")
+
+        self.cflags.remove('-std=c99')
+        self.cflags.append('-fsycl')
+
+        self.cflags.remove('-g')  # -g disables some optimizations in IGC
+        self.cflags.append('-gline-tables-only')
+        self.cflags.append('-fdebug-info-for-profiling')
+
+        if isinstance(platform, Cpu64):
+            pass
+        elif platform is NVIDIAX:
+            self.cflags.append('-fsycl-targets=nvptx64-cuda')
+        elif isinstance(platform, IntelDevice):
+            self.cflags.append('-fsycl-targets=spir64')
+        else:
+            raise NotImplementedError("Unsupported platform %s" % platform)
 
 
 class CustomCompiler(Compiler):
@@ -845,7 +897,7 @@ def __new__(cls, *args, **kwargs):
 
         if platform is M1:
             _base = ClangCompiler
-        elif platform is INTELGPUX:
+        elif isinstance(platform, IntelDevice):
             _base = OneapiCompiler
         elif platform is NVIDIAX:
             if language == 'cuda':
@@ -915,6 +967,7 @@ def __new_with__(self, **kwargs):
     'intel': OneapiCompiler,
     'icx': OneapiCompiler,
     'icpx': OneapiCompiler,
+    'sycl': SyclCompiler,
     'icc': IntelCompiler,
     'icpc': IntelCompiler,
     'intel-knl': IntelKNLCompiler,

diff --git a/devito/finite_differences/differentiable.py b/devito/finite_differences/differentiable.py
@@ -682,11 +682,12 @@ def __init_finalize__(self, *args, **kwargs):
         assert isinstance(weights, (list, tuple, np.ndarray))
 
         # Normalize `weights`
-        weights = tuple(sympy.sympify(i) for i in weights)
+        from devito.symbolics import pow_to_mul  # noqa, sigh
+        weights = tuple(pow_to_mul(sympy.sympify(i)) for i in weights)
 
         self._spacings = set().union(*[i.find(Spacing) for i in weights])
 
-        kwargs['scope'] = 'constant'
+        kwargs['scope'] = kwargs.get('scope', 'stack')
         kwargs['initvalue'] = weights
 
         super().__init_finalize__(*args, **kwargs)
@@ -701,7 +702,7 @@ def __eq__(self, other):
     __hash__ = sympy.Basic.__hash__
 
     def _hashable_content(self):
-        return (self.name, self.dimension, str(self.weights))
+        return (self.name, self.dimension, str(self.weights), self.scope)
 
     @property
     def dimension(self):