Skip to content

Commit

Permalink
Merge pull request #2282 from devitocodes/sycl-init
Browse files Browse the repository at this point in the history
compiler: Misc code generation improvements
  • Loading branch information
mloubout authored Dec 21, 2023
2 parents 3126fb0 + 5f63560 commit c888cee
Show file tree
Hide file tree
Showing 26 changed files with 986 additions and 387 deletions.
23 changes: 22 additions & 1 deletion .github/workflows/docker-bases.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ on:
- '/docker/Dockerfile.nvidia'
- '/docker/Dockerfile.cpu'
- '/docker/Dockerfile.amd'
- '/docker/Dockerfile.intel'
workflow_dispatch:
inputs:
tags:
Expand Down Expand Up @@ -95,12 +96,32 @@ jobs:
uses: docker/build-push-action@v3
with:
context: .
file: './docker/Dockerfile.cpu'
file: './docker/Dockerfile.intel'
push: true
target: 'icx'
build-args: 'arch=icx'
tags: 'devitocodes/bases:cpu-icx'

- name: SYCL CPU image
uses: docker/build-push-action@v3
with:
context: .
file: './docker/Dockerfile.intel'
push: true
target: 'cpu-sycl'
build-args: 'arch=cpu-sycl'
tags: 'devitocodes/bases:cpu-sycl'

- name: SYCL GPU image
uses: docker/build-push-action@v3
with:
context: .
file: './docker/Dockerfile.intel'
push: true
target: 'gpu-sycl'
build-args: 'arch=gpu-sycl'
tags: 'devitocodes/bases:gpu-sycl'

#######################################################
################### Nvidia nvhpc ######################
#######################################################
Expand Down
10 changes: 7 additions & 3 deletions devito/arch/archinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# Generic GPUs
'AMDGPUX', 'NVIDIAX', 'INTELGPUX',
# Intel GPUs
'PVC']
'PVC', 'INTELGPUMAX', 'MAX1100', 'MAX1550']


@memoized_func
Expand Down Expand Up @@ -848,10 +848,14 @@ def march(cls):

# Devices
NVIDIAX = NvidiaDevice('nvidiaX')

AMDGPUX = AmdDevice('amdgpuX')
INTELGPUX = IntelDevice('intelgpuX')

PVC = IntelDevice('pvc', max_threads_per_block=4096) # Intel Ponte Vecchio GPU
INTELGPUX = IntelDevice('intelgpuX')
PVC = IntelDevice('pvc', max_threads_per_block=4096) # Legacy codename for MAX GPUs
INTELGPUMAX = IntelDevice('intelgpuMAX', max_threads_per_block=4096)
MAX1100 = IntelDevice('max1100', max_threads_per_block=4096)
MAX1550 = IntelDevice('max1550', max_threads_per_block=4096)

platform_registry = Platform.registry
platform_registry['cpu64'] = get_platform # Autodetection
Expand Down
113 changes: 83 additions & 30 deletions devito/arch/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@
from hashlib import sha1
from os import environ, path, makedirs
from packaging.version import Version
from subprocess import DEVNULL, PIPE, CalledProcessError, check_output, check_call, run
from subprocess import (DEVNULL, PIPE, CalledProcessError, check_output,
check_call, run)
import platform
import warnings
import sys
import time

import numpy.ctypeslib as npct
from codepy.jit import compile_from_string
from codepy.toolchain import GCCToolchain, call_capture_output as _call_capture_output
from codepy.toolchain import (GCCToolchain,
call_capture_output as _call_capture_output)

from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, POWER8, POWER9, GRAVITON,
INTELGPUX, PVC, get_nvidia_cc, check_cuda_runtime,
IntelDevice, get_nvidia_cc, check_cuda_runtime,
get_m1_llvm_path)
from devito.exceptions import CompilationError
from devito.logger import debug, warning, error
Expand Down Expand Up @@ -716,7 +718,6 @@ def __lookup_cmds__(self):
class IntelCompiler(Compiler):

def __init_finalize__(self, **kwargs):

platform = kwargs.pop('platform', configuration['platform'])
language = kwargs.pop('language', configuration['language'])

Expand All @@ -734,13 +735,20 @@ def __init_finalize__(self, **kwargs):
if language == 'openmp':
self.ldflags.append('-qopenmp')

# Make sure the MPI compiler uses `icc` underneath -- whatever the MPI distro is
if kwargs.get('mpi'):
mpi_distro = sniff_mpi_distro('mpiexec')
if mpi_distro != 'IntelMPI':
warning("Expected Intel MPI distribution with `%s`, but found `%s`"
% (self.__class__.__name__, mpi_distro))
self.cflags.insert(0, '-cc=%s' % self.CC)
self.__init_intel_mpi__()
self.__init_intel_mpi_flags__()

def __init_intel_mpi__(self, **kwargs):
# Make sure the MPI compiler uses an Intel compiler underneath,
# whatever the MPI distro is
mpi_distro = sniff_mpi_distro('mpiexec')
if mpi_distro != 'IntelMPI':
warning("Expected Intel MPI distribution with `%s`, but found `%s`"
% (self.__class__.__name__, mpi_distro))

def __init_intel_mpi_flags__(self, **kwargs):
self.cflags.insert(0, '-cc=%s' % self.CC)

def get_version(self):
if configuration['mpi']:
Expand Down Expand Up @@ -792,36 +800,80 @@ def __init_finalize__(self, **kwargs):
platform = kwargs.pop('platform', configuration['platform'])
language = kwargs.pop('language', configuration['language'])

# Earlier versions to OneAPI 2023.2.0 (clang17 underneath), have an OpenMP bug
if self.version < Version('17.0.0') and language == 'openmp':
self.ldflags.remove('-qopenmp')
self.ldflags.append('-fopenmp')

if language == 'sycl':
self.cflags.append('-fsycl')
if platform is NVIDIAX:
self.cflags.append('-fsycl-targets=nvptx64-cuda')
else:
self.cflags.append('-fsycl-targets=spir64')
raise ValueError("Use SyclCompiler to jit-compile sycl")

elif language == 'openmp':
# Earlier versions to OneAPI 2023.2.0 (clang17 underneath), have an
# OpenMP bug concerning reductions, hence with them we're forced to
# use the obsolete -fopenmp
if self.version < Version('17.0.0'):
self.ldflags.remove('-qopenmp')
self.ldflags.append('-fopenmp')

if platform is NVIDIAX:
self.cflags.append('-fopenmp-targets=nvptx64-cuda')
if platform in [INTELGPUX, PVC]:
self.ldflags.append('-fiopenmp')
self.ldflags.append('-fopenmp-targets=spir64')
self.ldflags.append('-fopenmp-target-simd')
elif isinstance(platform, IntelDevice):
self.cflags.append('-fiopenmp')
self.cflags.append('-fopenmp-targets=spir64')
self.cflags.append('-fopenmp-target-simd')

self.cflags.remove('-g') # -g disables some optimizations in IGC
self.cflags.append('-gline-tables-only')
self.cflags.append('-fdebug-info-for-profiling')

def __init_intel_mpi__(self, **kwargs):
IntelCompiler.__init_intel_mpi__(self, **kwargs)

self.cflags.remove('-g') # -g disables some optimizations in IGC
self.cflags.append('-gline-tables-only')
self.cflags.append('-fdebug-info-for-profiling')
platform = kwargs.pop('platform', configuration['platform'])

# The Intel toolchain requires the I_MPI_OFFLOAD env var to be set
# to enable GPU-aware MPI (that is, passing device pointers to MPI calls)
if isinstance(platform, IntelDevice):
environ['I_MPI_OFFLOAD'] = '1'

def __init_intel_mpi_flags__(self, **kwargs):
pass

get_version = Compiler.get_version

def __lookup_cmds__(self):
# OneAPI HPC ToolKit comes with icpx, which is clang++,
# and icx, which is clang
self.CC = 'icx'
self.CXX = 'icpx'
self.MPICC = 'mpicc'
self.MPICXX = 'mpicxx'
self.MPICC = 'mpiicx'
self.MPICXX = 'mpiicpx'


class SyclCompiler(OneapiCompiler):

_cpp = True

def __init_finalize__(self, **kwargs):
IntelCompiler.__init_finalize__(self, **kwargs)

platform = kwargs.pop('platform', configuration['platform'])
language = kwargs.pop('language', configuration['language'])

if language != 'sycl':
raise ValueError("Expected language sycl with SyclCompiler")

self.cflags.remove('-std=c99')
self.cflags.append('-fsycl')

self.cflags.remove('-g') # -g disables some optimizations in IGC
self.cflags.append('-gline-tables-only')
self.cflags.append('-fdebug-info-for-profiling')

if isinstance(platform, Cpu64):
pass
elif platform is NVIDIAX:
self.cflags.append('-fsycl-targets=nvptx64-cuda')
elif isinstance(platform, IntelDevice):
self.cflags.append('-fsycl-targets=spir64')
else:
raise NotImplementedError("Unsupported platform %s" % platform)


class CustomCompiler(Compiler):
Expand All @@ -845,7 +897,7 @@ def __new__(cls, *args, **kwargs):

if platform is M1:
_base = ClangCompiler
elif platform is INTELGPUX:
elif isinstance(platform, IntelDevice):
_base = OneapiCompiler
elif platform is NVIDIAX:
if language == 'cuda':
Expand Down Expand Up @@ -915,6 +967,7 @@ def __new_with__(self, **kwargs):
'intel': OneapiCompiler,
'icx': OneapiCompiler,
'icpx': OneapiCompiler,
'sycl': SyclCompiler,
'icc': IntelCompiler,
'icpc': IntelCompiler,
'intel-knl': IntelKNLCompiler,
Expand Down
7 changes: 4 additions & 3 deletions devito/finite_differences/differentiable.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,11 +682,12 @@ def __init_finalize__(self, *args, **kwargs):
assert isinstance(weights, (list, tuple, np.ndarray))

# Normalize `weights`
weights = tuple(sympy.sympify(i) for i in weights)
from devito.symbolics import pow_to_mul # noqa, sigh
weights = tuple(pow_to_mul(sympy.sympify(i)) for i in weights)

self._spacings = set().union(*[i.find(Spacing) for i in weights])

kwargs['scope'] = 'constant'
kwargs['scope'] = kwargs.get('scope', 'stack')
kwargs['initvalue'] = weights

super().__init_finalize__(*args, **kwargs)
Expand All @@ -701,7 +702,7 @@ def __eq__(self, other):
__hash__ = sympy.Basic.__hash__

def _hashable_content(self):
return (self.name, self.dimension, str(self.weights))
return (self.name, self.dimension, str(self.weights), self.scope)

@property
def dimension(self):
Expand Down
Loading

0 comments on commit c888cee

Please sign in to comment.