diff --git a/.github/workflows/docker-bases.yml b/.github/workflows/docker-bases.yml index 96846320fc..585ea16911 100644 --- a/.github/workflows/docker-bases.yml +++ b/.github/workflows/docker-bases.yml @@ -10,6 +10,7 @@ on: - '/docker/Dockerfile.nvidia' - '/docker/Dockerfile.cpu' - '/docker/Dockerfile.amd' + - '/docker/Dockerfile.intel' workflow_dispatch: inputs: tags: @@ -95,12 +96,32 @@ jobs: uses: docker/build-push-action@v3 with: context: . - file: './docker/Dockerfile.cpu' + file: './docker/Dockerfile.intel' push: true target: 'icx' build-args: 'arch=icx' tags: 'devitocodes/bases:cpu-icx' + - name: SYCL CPU image + uses: docker/build-push-action@v3 + with: + context: . + file: './docker/Dockerfile.intel' + push: true + target: 'cpu-sycl' + build-args: 'arch=cpu-sycl' + tags: 'devitocodes/bases:cpu-sycl' + + - name: SYCL GPU image + uses: docker/build-push-action@v3 + with: + context: . + file: './docker/Dockerfile.intel' + push: true + target: 'gpu-sycl' + build-args: 'arch=gpu-sycl' + tags: 'devitocodes/bases:gpu-sycl' + ####################################################### ################### Nvidia nvhpc ###################### ####################################################### diff --git a/devito/arch/archinfo.py b/devito/arch/archinfo.py index cccef94712..7563359669 100644 --- a/devito/arch/archinfo.py +++ b/devito/arch/archinfo.py @@ -29,7 +29,7 @@ # Generic GPUs 'AMDGPUX', 'NVIDIAX', 'INTELGPUX', # Intel GPUs - 'PVC'] + 'PVC', 'INTELGPUMAX', 'MAX1100', 'MAX1550'] @memoized_func @@ -848,10 +848,14 @@ def march(cls): # Devices NVIDIAX = NvidiaDevice('nvidiaX') + AMDGPUX = AmdDevice('amdgpuX') -INTELGPUX = IntelDevice('intelgpuX') -PVC = IntelDevice('pvc', max_threads_per_block=4096) # Intel Ponte Vecchio GPU +INTELGPUX = IntelDevice('intelgpuX') +PVC = IntelDevice('pvc', max_threads_per_block=4096) # Legacy codename for MAX GPUs +INTELGPUMAX = IntelDevice('intelgpuMAX', max_threads_per_block=4096) +MAX1100 = IntelDevice('max1100', max_threads_per_block=4096) +MAX1550 = IntelDevice('max1550', max_threads_per_block=4096) platform_registry = Platform.registry platform_registry['cpu64'] = get_platform # Autodetection diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py index 08f509c5fe..3b102f1822 100644 --- a/devito/arch/compiler.py +++ b/devito/arch/compiler.py @@ -2,7 +2,8 @@ from hashlib import sha1 from os import environ, path, makedirs from packaging.version import Version -from subprocess import DEVNULL, PIPE, CalledProcessError, check_output, check_call, run +from subprocess import (DEVNULL, PIPE, CalledProcessError, check_output, + check_call, run) import platform import warnings import sys @@ -10,10 +11,11 @@ import numpy.ctypeslib as npct from codepy.jit import compile_from_string -from codepy.toolchain import GCCToolchain, call_capture_output as _call_capture_output +from codepy.toolchain import (GCCToolchain, + call_capture_output as _call_capture_output) from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, POWER8, POWER9, GRAVITON, - INTELGPUX, PVC, get_nvidia_cc, check_cuda_runtime, + IntelDevice, get_nvidia_cc, check_cuda_runtime, get_m1_llvm_path) from devito.exceptions import CompilationError from devito.logger import debug, warning, error @@ -716,7 +718,6 @@ def __lookup_cmds__(self): class IntelCompiler(Compiler): def __init_finalize__(self, **kwargs): - platform = kwargs.pop('platform', configuration['platform']) language = kwargs.pop('language', configuration['language']) @@ -734,13 +735,20 @@ def __init_finalize__(self, **kwargs): if language == 'openmp': self.ldflags.append('-qopenmp') - # Make sure the MPI compiler uses `icc` underneath -- whatever the MPI distro is if kwargs.get('mpi'): - mpi_distro = sniff_mpi_distro('mpiexec') - if mpi_distro != 'IntelMPI': - warning("Expected Intel MPI distribution with `%s`, but found `%s`" - % (self.__class__.__name__, mpi_distro)) - self.cflags.insert(0, '-cc=%s' % self.CC) + self.__init_intel_mpi__() + self.__init_intel_mpi_flags__() + + def __init_intel_mpi__(self, **kwargs): + # Make sure the MPI compiler uses an Intel compiler underneath, + # whatever the MPI distro is + mpi_distro = sniff_mpi_distro('mpiexec') + if mpi_distro != 'IntelMPI': + warning("Expected Intel MPI distribution with `%s`, but found `%s`" + % (self.__class__.__name__, mpi_distro)) + + def __init_intel_mpi_flags__(self, **kwargs): + self.cflags.insert(0, '-cc=%s' % self.CC) def get_version(self): if configuration['mpi']: @@ -792,36 +800,80 @@ def __init_finalize__(self, **kwargs): platform = kwargs.pop('platform', configuration['platform']) language = kwargs.pop('language', configuration['language']) - # Earlier versions to OneAPI 2023.2.0 (clang17 underneath), have an OpenMP bug - if self.version < Version('17.0.0') and language == 'openmp': - self.ldflags.remove('-qopenmp') - self.ldflags.append('-fopenmp') - if language == 'sycl': - self.cflags.append('-fsycl') - if platform is NVIDIAX: - self.cflags.append('-fsycl-targets=nvptx64-cuda') - else: - self.cflags.append('-fsycl-targets=spir64') + raise ValueError("Use SyclCompiler to jit-compile sycl") + + elif language == 'openmp': + # Earlier versions to OneAPI 2023.2.0 (clang17 underneath), have an + # OpenMP bug concerning reductions, hence with them we're forced to + # use the obsolete -fopenmp + if self.version < Version('17.0.0'): + self.ldflags.remove('-qopenmp') + self.ldflags.append('-fopenmp') if platform is NVIDIAX: self.cflags.append('-fopenmp-targets=nvptx64-cuda') - if platform in [INTELGPUX, PVC]: - self.ldflags.append('-fiopenmp') - self.ldflags.append('-fopenmp-targets=spir64') - self.ldflags.append('-fopenmp-target-simd') + elif isinstance(platform, IntelDevice): + self.cflags.append('-fiopenmp') + self.cflags.append('-fopenmp-targets=spir64') + self.cflags.append('-fopenmp-target-simd') + + self.cflags.remove('-g') # -g disables some optimizations in IGC + self.cflags.append('-gline-tables-only') + self.cflags.append('-fdebug-info-for-profiling') + + def __init_intel_mpi__(self, **kwargs): + IntelCompiler.__init_intel_mpi__(self, **kwargs) - self.cflags.remove('-g') # -g disables some optimizations in IGC - self.cflags.append('-gline-tables-only') - self.cflags.append('-fdebug-info-for-profiling') + platform = kwargs.pop('platform', configuration['platform']) + + # The Intel toolchain requires the I_MPI_OFFLOAD env var to be set + # to enable GPU-aware MPI (that is, passing device pointers to MPI calls) + if isinstance(platform, IntelDevice): + environ['I_MPI_OFFLOAD'] = '1' + + def __init_intel_mpi_flags__(self, **kwargs): + pass + + get_version = Compiler.get_version def __lookup_cmds__(self): # OneAPI HPC ToolKit comes with icpx, which is clang++, # and icx, which is clang self.CC = 'icx' self.CXX = 'icpx' - self.MPICC = 'mpicc' - self.MPICXX = 'mpicxx' + self.MPICC = 'mpiicx' + self.MPICXX = 'mpiicpx' + + +class SyclCompiler(OneapiCompiler): + + _cpp = True + + def __init_finalize__(self, **kwargs): + IntelCompiler.__init_finalize__(self, **kwargs) + + platform = kwargs.pop('platform', configuration['platform']) + language = kwargs.pop('language', configuration['language']) + + if language != 'sycl': + raise ValueError("Expected language sycl with SyclCompiler") + + self.cflags.remove('-std=c99') + self.cflags.append('-fsycl') + + self.cflags.remove('-g') # -g disables some optimizations in IGC + self.cflags.append('-gline-tables-only') + self.cflags.append('-fdebug-info-for-profiling') + + if isinstance(platform, Cpu64): + pass + elif platform is NVIDIAX: + self.cflags.append('-fsycl-targets=nvptx64-cuda') + elif isinstance(platform, IntelDevice): + self.cflags.append('-fsycl-targets=spir64') + else: + raise NotImplementedError("Unsupported platform %s" % platform) class CustomCompiler(Compiler): @@ -845,7 +897,7 @@ def __new__(cls, *args, **kwargs): if platform is M1: _base = ClangCompiler - elif platform is INTELGPUX: + elif isinstance(platform, IntelDevice): _base = OneapiCompiler elif platform is NVIDIAX: if language == 'cuda': @@ -915,6 +967,7 @@ def __new_with__(self, **kwargs): 'intel': OneapiCompiler, 'icx': OneapiCompiler, 'icpx': OneapiCompiler, + 'sycl': SyclCompiler, 'icc': IntelCompiler, 'icpc': IntelCompiler, 'intel-knl': IntelKNLCompiler, diff --git a/devito/finite_differences/differentiable.py b/devito/finite_differences/differentiable.py index 607ab7693b..6cb4d4b45d 100644 --- a/devito/finite_differences/differentiable.py +++ b/devito/finite_differences/differentiable.py @@ -682,11 +682,12 @@ def __init_finalize__(self, *args, **kwargs): assert isinstance(weights, (list, tuple, np.ndarray)) # Normalize `weights` - weights = tuple(sympy.sympify(i) for i in weights) + from devito.symbolics import pow_to_mul # noqa, sigh + weights = tuple(pow_to_mul(sympy.sympify(i)) for i in weights) self._spacings = set().union(*[i.find(Spacing) for i in weights]) - kwargs['scope'] = 'constant' + kwargs['scope'] = kwargs.get('scope', 'stack') kwargs['initvalue'] = weights super().__init_finalize__(*args, **kwargs) @@ -701,7 +702,7 @@ def __eq__(self, other): __hash__ = sympy.Basic.__hash__ def _hashable_content(self): - return (self.name, self.dimension, str(self.weights)) + return (self.name, self.dimension, str(self.weights), self.scope) @property def dimension(self): diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index 486e7e4b80..136e7bbfee 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -24,7 +24,7 @@ __all__ = ['Node', 'Block', 'Expression', 'Callable', 'Call', 'ExprStmt', 'Conditional', 'Iteration', 'List', 'Section', 'TimedList', 'Prodder', 'MetaCall', 'PointerCast', 'HaloSpot', 'Definition', 'ExpressionBundle', - 'AugmentedExpression', 'Increment', 'Return', 'While', + 'AugmentedExpression', 'Increment', 'Return', 'While', 'ListMajor', 'ParallelIteration', 'ParallelBlock', 'Dereference', 'Lambda', 'SyncSpot', 'Pragma', 'DummyExpr', 'BlankLine', 'ParallelTree', 'BusyWait', 'CallableBody', 'Transfer'] @@ -132,12 +132,12 @@ def __repr__(self): @property def functions(self): - """All AbstractFunction objects used by this node.""" + """All AbstractFunctions and AbstractObjects used by this node.""" return () @property def expr_symbols(self): - """All symbols appearing in an expression within this node.""" + """All symbols appearing within this node.""" return () @property @@ -243,12 +243,14 @@ class Call(ExprStmt, Node): Explicitly tagging these AbstractFunctions is useful in the case of external calls, that is whenever the compiler would be unable to retrieve that information by analysis of the IET graph. + templates : list of Basic, optional + The template arguments of the Call. """ is_Call = True def __init__(self, name, arguments=None, retobj=None, is_indirect=False, - cast=False, writes=None): + cast=False, writes=None, templates=None): if isinstance(name, CallFromPointer): self.base = name.base else: @@ -259,11 +261,22 @@ def __init__(self, name, arguments=None, retobj=None, is_indirect=False, self.is_indirect = is_indirect self.cast = cast self._writes = as_tuple(writes) + self.templates = as_tuple(templates) def __repr__(self): ret = "" if self.retobj is None else "%s = " % self.retobj return "%sCall::\n\t%s(...)" % (ret, self.name) + def _rebuild(self, *args, **kwargs): + if args: + # Not elegant, but basically it handles the fact that a Call might + # have nested Calls/Lambdas among its `arguments`, and these might + # change, and we are in such a case *if and only if* we have `args` + assert len(args) == len(self.children) + mapper = dict(zip(self.children, args)) + kwargs['arguments'] = [mapper.get(i, i) for i in self.arguments] + return super()._rebuild(**kwargs) + @property def children(self): return tuple(i for i in self.arguments if isinstance(i, (Call, Lambda))) @@ -326,8 +339,6 @@ def expr_symbols(self): @property def defines(self): ret = () - if self.base is not None: - ret += (self.base,) if isinstance(self.retobj, Basic): ret += (self.retobj,) return ret @@ -743,8 +754,16 @@ class CallableBody(Node): init : Node, optional A piece of IET to perform some initialization relevant for `body` (e.g., to initialize the target language runtime). + standalones : list of Definitions, optional + Object definitions for `body`. Instantiating these objects does not + require passing any arguments to their constructors, so these + Definitions can be scheduled safely right after `init`. They may or may + not be required by some of the subsequent nodes (e.g., `allocs`, + `maps`). allocs : list of Nodes, optional Data definitions and allocations for `body`. + stacks : list of Definitions, optional + Definitions for the stack-scoped objects appearing in `body`. casts : list of PointerCasts, optional Sequence of PointerCasts required by the `body`. bundles : list of Nodes, optional @@ -756,7 +775,9 @@ class CallableBody(Node): strides : list of Nodes, optional Statements defining symbols used to access linearized arrays. objs : list of Definitions, optional - Object definitions for `body`. + Object definitions for `body`. Instantiating these objects may or may + not require some of the symbols defined in the previous nodes (e.g., + `allocs`, `maps`). unmaps : Transfer or list of Transfer, optional Data unmaps for `body`. unbundles : list of Nodes, optional @@ -767,11 +788,13 @@ class CallableBody(Node): is_CallableBody = True - _traversable = ['unpacks', 'init', 'allocs', 'casts', 'bundles', 'maps', - 'strides', 'objs', 'body', 'unmaps', 'unbundles', 'frees'] + _traversable = ['unpacks', 'init', 'standalones', 'allocs', 'stacks', + 'casts', 'bundles', 'maps', 'strides', 'objs', 'body', + 'unmaps', 'unbundles', 'frees'] - def __init__(self, body, init=(), unpacks=(), strides=(), allocs=(), casts=(), - bundles=(), objs=(), maps=(), unmaps=(), unbundles=(), frees=()): + def __init__(self, body, init=(), standalones=(), unpacks=(), strides=(), + allocs=(), stacks=(), casts=(), bundles=(), objs=(), maps=(), + unmaps=(), unbundles=(), frees=()): # Sanity check assert not isinstance(body, CallableBody), "CallableBody's cannot be nested" @@ -779,7 +802,9 @@ def __init__(self, body, init=(), unpacks=(), strides=(), allocs=(), casts=(), self.unpacks = as_tuple(unpacks) self.init = as_tuple(init) + self.standalones = as_tuple(standalones) self.allocs = as_tuple(allocs) + self.stacks = as_tuple(stacks) self.casts = as_tuple(casts) self.strides = as_tuple(strides) self.bundles = as_tuple(bundles) @@ -894,7 +919,12 @@ def __repr__(self): @property def functions(self): - return (self.function,) + ret = [self.function] + for i in self.expr_symbols: + f = i.function + if f.is_AbstractFunction or f.is_AbstractObject: + ret.append(i.function) + return tuple(ret) @property def defines(self): @@ -905,16 +935,25 @@ def defines(self): @property def expr_symbols(self): - if not self.function.is_Array or self.function.initvalue is None: - return () - # These are just a handful of values so it's OK to iterate them over - ret = set() - for i in self.function.initvalue: + f = self.function + if f.is_LocalObject: + ret = set(flatten(i.free_symbols for i in f.cargs)) try: - ret.update(i.free_symbols) + ret.update(f.initvalue.free_symbols) except AttributeError: pass - return tuple(ret) + return tuple(ret) + elif f.is_Array and f.initvalue is not None: + # These are just a handful of values so it's OK to iterate them over + ret = set() + for i in f.initvalue: + try: + ret.update(i.free_symbols) + except AttributeError: + pass + return tuple(ret) + else: + return () class PointerCast(ExprStmt, Node): @@ -1044,10 +1083,19 @@ def __init__(self, body, captures=None, parameters=None): def __repr__(self): return "Lambda[%s](%s)" % (self.captures, self.parameters) + @property + def functions(self): + return tuple(i.function for i in self.parameters + if isinstance(i.function, AbstractFunction)) + @cached_property def expr_symbols(self): return tuple(self.parameters) + @property + def defines(self): + return tuple(self.parameters) + class Section(List): @@ -1310,6 +1358,10 @@ def __init__(self, value=None): self.value = value +class ListMajor(List): + pass + + def DummyExpr(*args, init=False): return Expression(DummyEq(*args), init=init) diff --git a/devito/ir/iet/visitors.py b/devito/ir/iet/visitors.py index 38145cac3a..f6e266ed9d 100644 --- a/devito/ir/iet/visitors.py +++ b/devito/ir/iet/visitors.py @@ -15,9 +15,10 @@ from devito.exceptions import VisitorException from devito.ir.iet.nodes import (Node, Iteration, Expression, ExpressionBundle, - Call, Lambda, BlankLine, Section) + Call, Lambda, BlankLine, Section, ListMajor) from devito.ir.support.space import Backward -from devito.symbolics import ListInitializer, ccode, uxreplace +from devito.symbolics import (FieldFromComposite, FieldFromPointer, + ListInitializer, ccode, uxreplace) from devito.tools import (GenericVisitor, as_tuple, ctypes_to_cstr, filter_ordered, filter_sorted, flatten, is_external_ctype, c_restrict_void_p, sorted_priority) @@ -192,10 +193,14 @@ def _gen_struct_decl(self, obj, masked=()): Convert ctypes.Struct -> cgen.Structure. """ ctype = obj._C_ctype - while issubclass(ctype, ctypes._Pointer): - ctype = ctype._type_ + try: + while issubclass(ctype, ctypes._Pointer): + ctype = ctype._type_ - if not issubclass(ctype, ctypes.Structure): + if not issubclass(ctype, ctypes.Structure): + return None + except TypeError: + # E.g., `ctype` is of type `dtypes_lowering.CustomDtype` return None try: @@ -223,45 +228,55 @@ def _gen_struct_decl(self, obj, masked=()): return c.Struct(ctype.__name__, entries) - def _gen_value(self, obj, level=2, masked=()): + def _gen_value(self, obj, mode=1, masked=()): + """ + Convert a devito.types.Basic object into a cgen declaration/definition. + + A Basic object may need to be declared and optionally defined in three + different ways, which correspond to the three possible values of `mode`: + + * 0: Simple. E.g., `int a = 1`; + * 1: Comprehensive. E.g., `const int *restrict a`, `int a[10]`; + * 2: Declaration suitable for a function parameter list. + """ qualifiers = [v for k, v in self._qualifiers_mapper.items() if getattr(obj.function, k, False) and v not in masked] - if (obj._mem_stack or obj._mem_constant) and level == 2: + if (obj._mem_stack or obj._mem_constant) and mode == 1: strtype = obj._C_typedata strshape = ''.join('[%s]' % ccode(i) for i in obj.symbolic_shape) else: strtype = ctypes_to_cstr(obj._C_ctype) strshape = '' - if isinstance(obj, (AbstractFunction, IndexedData)) and level >= 1: + if isinstance(obj, (AbstractFunction, IndexedData)) and mode >= 1: if not obj._mem_stack: strtype = '%s%s' % (strtype, self._restrict_keyword) strtype = ' '.join(qualifiers + [strtype]) + if obj.is_LocalObject and obj._C_modifier is not None and mode == 2: + strtype += obj._C_modifier + strname = obj._C_name strobj = '%s%s' % (strname, strshape) - try: - if obj.cargs: - strobj = MultilineCall(strobj, obj.cargs, True) - except AttributeError: - pass + if obj.is_LocalObject and obj.cargs and mode == 1: + arguments = [ccode(i) for i in obj.cargs] + strobj = MultilineCall(strobj, arguments, True) value = c.Value(strtype, strobj) try: - if obj.is_AbstractFunction and obj._data_alignment and level == 2: + if obj.is_AbstractFunction and obj._data_alignment and mode == 1: value = c.AlignedAttribute(obj._data_alignment, value) except AttributeError: pass - try: - if obj.initvalue is not None and level == 2: - init = ListInitializer(obj.initvalue) - if not obj._mem_constant or init.is_numeric: - value = c.Initializer(value, ccode(init)) - except AttributeError: - pass + if obj.is_Array and obj.initvalue is not None and mode == 1: + init = ListInitializer(obj.initvalue) + if not obj._mem_constant or init.is_numeric: + value = c.Initializer(value, ccode(init)) + elif obj.is_LocalObject and obj.initvalue is not None and mode == 1: + value = c.Initializer(value, ccode(obj.initvalue)) return value @@ -269,12 +284,17 @@ def _gen_rettype(self, obj): try: return self._gen_value(obj, 0).typename except AttributeError: - assert isinstance(obj, str) + pass + if isinstance(obj, str): return obj + elif isinstance(obj, (FieldFromComposite, FieldFromPointer)): + return self._gen_value(obj.function.base, 0).typename + else: + return None def _args_decl(self, args): """Generate cgen declarations from an iterable of symbols and expressions.""" - return [self._gen_value(i, 1) for i in args] + return [self._gen_value(i, 2) for i in args] def _args_call(self, args): """ @@ -306,7 +326,8 @@ def _blankline_logic(self, children): """ Generate cgen blank lines in between logical units. """ - candidates = (Expression, ExpressionBundle, Iteration, Section) + candidates = (Expression, ExpressionBundle, Iteration, Section, + ListMajor) processed = [] for child in children: @@ -327,8 +348,8 @@ def _blankline_logic(self, children): all(i.dim.is_Stencil for i in g)): rebuilt.extend(g) elif (prev in candidates and k in candidates) or \ - (prev is not None and k is Section) or \ - (prev is Section): + (prev is not None and k in (ListMajor, Section)) or \ + (prev in (ListMajor, Section)): rebuilt.append(BlankLine) rebuilt.extend(g) else: @@ -467,16 +488,19 @@ def visit_AugmentedExpression(self, o): def visit_Call(self, o, nested_call=False): retobj = o.retobj - cast = o.cast and self._gen_rettype(retobj) + rettype = self._gen_rettype(retobj) + cast = o.cast and rettype arguments = self._args_call(o.arguments) if retobj is None: - return MultilineCall(o.name, arguments, nested_call, o.is_indirect, cast) + return MultilineCall(o.name, arguments, nested_call, o.is_indirect, + cast, o.templates) else: - call = MultilineCall(o.name, arguments, True, o.is_indirect, cast) - if retobj.is_Indexed: + call = MultilineCall(o.name, arguments, True, o.is_indirect, cast, + o.templates) + if retobj.is_Indexed or \ + isinstance(retobj, (FieldFromComposite, FieldFromPointer)): return c.Assign(ccode(retobj), call) else: - rettype = self._gen_rettype(retobj) return c.Initializer(c.Value(rettype, retobj._C_name), call) def visit_Conditional(self, o): @@ -562,7 +586,13 @@ def visit_CallableBody(self, o): return c.Collection(body) def visit_Lambda(self, o): - body = flatten(self._visit(i) for i in o.children) + body = [] + for i in o.children: + v = self._visit(i) + if v: + if body: + body.append(c.Line()) + body.extend(as_tuple(v)) captures = [str(i) for i in o.captures] decls = [i.inline() for i in self._args_decl(o.parameters)] top = c.Line('[%s](%s)' % (', '.join(captures), ', '.join(decls))) @@ -605,7 +635,8 @@ def _operator_typedecls(self, o, mode='all'): # This is essentially to rule out vector types which are declared already # in some external headers - xfilter = lambda i: xfilter1(i) and not is_external_ctype(i._C_ctype, o._includes) + xfilter = lambda i: (xfilter1(i) and + not is_external_ctype(i._C_ctype, o._includes)) candidates = o.parameters + tuple(o._dspace.parts) typedecls = [self._gen_struct_decl(i) for i in candidates if xfilter(i)] @@ -645,6 +676,11 @@ def visit_Operator(self, o, mode='all'): # Header files includes = self._operator_includes(o) + [blankline] + # Namespaces + namespaces = [c.Statement('using namespace %s' % i) for i in o._namespaces] + if namespaces: + namespaces.append(blankline) + # Type declarations typedecls = self._operator_typedecls(o, mode) if mode in ('all', 'public') and o._compiler.src_ext in ('cpp', 'cu'): @@ -656,7 +692,7 @@ def visit_Operator(self, o, mode='all'): if globs: globs.append(blankline) - return c.Module(headers + includes + typedecls + globs + + return c.Module(headers + includes + namespaces + typedecls + globs + esigns + [blankline, kernel] + efuncs) @@ -745,6 +781,8 @@ def visit_Conditional(self, o, ret=None, queue=None): ret = self._visit(i, ret=ret, queue=queue) return ret + visit_Call = visit_Conditional + class MapKind(FindSections): @@ -764,11 +802,21 @@ def visit_dummy(self, o, ret=None, queue=None): visit_Conditional = FindSections.visit_Iteration visit_Block = FindSections.visit_Iteration + visit_Lambda = FindSections.visit_Iteration class MapExprStmts(MapKind): + visit_ExprStmt = MapKind.visit_dummy + def visit_Call(self, o, ret=None, queue=None): + if ret is None: + ret = self.default_retval() + ret[o] = as_tuple(queue) + for i in o.children: + ret = self._visit(i, ret=ret, queue=queue) + return ret + class MapHaloSpots(MapKind): visit_HaloSpot = MapKind.visit_dummy @@ -896,7 +944,7 @@ def _defines_aliases(n): if isinstance(i, IndexedBase)], 'writes': lambda n: as_tuple(n.writes), 'defines': lambda n: as_tuple(n.defines), - 'globals': lambda n: [f.indexed for f in n.functions if f._mem_constant], + 'globals': lambda n: [f.base for f in n.functions if f._mem_global], 'defines-aliases': _defines_aliases } @@ -1175,13 +1223,23 @@ def visit_Callable(self, o): return o._rebuild(body=body, parameters=parameters) def visit_Call(self, o): - arguments = [uxreplace(i, self.mapper) for i in o.arguments] + arguments = [] + for i in o.arguments: + if i in o.children: + arguments.append(self._visit(i)) + else: + arguments.append(uxreplace(i, self.mapper)) if o.retobj is not None: retobj = uxreplace(o.retobj, self.mapper) return o._rebuild(arguments=arguments, retobj=retobj) else: return o._rebuild(arguments=arguments) + def visit_Lambda(self, o): + body = self._visit(o.body) + parameters = [self.mapper.get(i, i) for i in o.parameters] + return o._rebuild(body=body, parameters=parameters) + def visit_Conditional(self, o): condition = uxreplace(o.condition, self.mapper) then_body = self._visit(o.then_body) @@ -1246,18 +1304,24 @@ class LambdaCollection(c.Collection): class MultilineCall(c.Generable): - def __init__(self, name, arguments, is_expr=False, is_indirect=False, cast=None): + def __init__(self, name, arguments, is_expr=False, is_indirect=False, + cast=None, templates=None): self.name = name self.arguments = as_tuple(arguments) self.is_expr = is_expr self.is_indirect = is_indirect self.cast = cast + self.templates = templates def generate(self): + if self.templates: + tip = "%s<%s>" % (self.name, ", ".join(str(i) for i in self.templates)) + else: + tip = self.name if not self.is_indirect: - tip = "%s(" % self.name + tip = "%s(" % tip else: - tip = "%s%s" % (self.name, ',' if self.arguments else '') + tip = "%s%s" % (tip, ',' if self.arguments else '') processed = [] for i in self.arguments: if isinstance(i, (MultilineCall, LambdaCollection)): diff --git a/devito/operator/operator.py b/devito/operator/operator.py index 7aff552a47..4d5208235a 100644 --- a/devito/operator/operator.py +++ b/devito/operator/operator.py @@ -136,6 +136,7 @@ class Operator(Callable): _default_headers = [('_POSIX_C_SOURCE', '200809L')] _default_includes = ['stdlib.h', 'math.h', 'sys/time.h'] _default_globals = [] + _default_namespaces = [] def __new__(cls, expressions, **kwargs): if expressions is None: @@ -191,6 +192,8 @@ def _build(cls, expressions, **kwargs): op._includes = OrderedSet(*cls._default_includes) op._includes.update(profiler._default_includes) op._includes.update(byproduct.includes) + op._namespaces = OrderedSet(*cls._default_namespaces) + op._namespaces.update(byproduct.namespaces) # Required for the jit-compilation op._compiler = kwargs['compiler'] diff --git a/devito/operator/registry.py b/devito/operator/registry.py index 722b451a3a..04c1000866 100644 --- a/devito/operator/registry.py +++ b/devito/operator/registry.py @@ -26,7 +26,7 @@ class OperatorRegistry(OrderedDict, metaclass=Singleton): """ _modes = ('noop', 'advanced', 'advanced-fsg') - _languages = ('C', 'openmp', 'openacc', 'cuda', 'hip') + _languages = ('C', 'openmp', 'openacc', 'cuda', 'hip', 'sycl') _accepted = _modes + tuple(product(_modes, _languages)) def add(self, operator, platform, mode, language='C'): diff --git a/devito/passes/clusters/factorization.py b/devito/passes/clusters/factorization.py index 41c1ed2ec0..997b6d81fc 100644 --- a/devito/passes/clusters/factorization.py +++ b/devito/passes/clusters/factorization.py @@ -5,6 +5,7 @@ from devito.ir import cluster_pass from devito.symbolics import BasicWrapperMixin, estimate_cost, retrieve_symbols from devito.tools import ReducerMap +from devito.types.object import AbstractObject __all__ = ['factorize'] @@ -179,7 +180,9 @@ def _collect_nested(expr): return expr, {'funcs': expr} elif expr.is_Pow: return expr, {'pows': expr} - elif expr.is_Symbol or expr.is_Indexed or isinstance(expr, BasicWrapperMixin): + elif (expr.is_Symbol or + expr.is_Indexed or + isinstance(expr, (BasicWrapperMixin, AbstractObject))): return expr, {} elif expr.is_Add: return strategies['default'](expr), {} diff --git a/devito/passes/iet/definitions.py b/devito/passes/iet/definitions.py index 903cb5eb35..913432da8e 100644 --- a/devito/passes/iet/definitions.py +++ b/devito/passes/iet/definitions.py @@ -24,8 +24,8 @@ class MetaSite(object): - _items = ('allocs', 'objs', 'frees', 'pallocs', 'pfrees', - 'maps', 'unmaps', 'efuncs') + _items = ('standalones', 'allocs', 'stacks', 'objs', 'frees', 'pallocs', + 'pfrees', 'maps', 'unmaps', 'efuncs') def __init__(self): for i in self._items: @@ -90,7 +90,10 @@ def _alloc_object_on_low_lat_mem(self, site, obj, storage): frees = obj._C_free - storage.update(obj, site, objs=definition, frees=frees) + if obj.free_symbols - {obj}: + storage.update(obj, site, objs=definition, frees=frees) + else: + storage.update(obj, site, standalones=definition, frees=frees) def _alloc_array_on_low_lat_mem(self, site, obj, storage): """ @@ -98,7 +101,7 @@ def _alloc_array_on_low_lat_mem(self, site, obj, storage): """ alloc = Definition(obj) - storage.update(obj, site, allocs=alloc) + storage.update(obj, site, stacks=alloc) def _alloc_array_on_global_mem(self, site, obj, storage): """ @@ -198,13 +201,13 @@ def _alloc_mapped_array_on_high_bw_mem(self, site, obj, storage, *args): name = self.sregistry.make_name(prefix='alloc') body = (decl, *allocs, init, ret) efunc0 = make_callable(name, body, retval=obj) - assert len(efunc0.parameters) == 1 # `nbytes_param` - alloc = Call(name, nbytes_arg, retobj=obj) + args = list(efunc0.parameters) + args[args.index(nbytes_param)] = nbytes_arg + alloc = Call(name, args, retobj=obj) name = self.sregistry.make_name(prefix='free') efunc1 = make_callable(name, frees) - assert len(efunc1.parameters) == 1 # `obj` - free = Call(name, obj) + free = Call(name, efunc1.parameters) storage.update(obj, site, allocs=alloc, frees=free, efuncs=(efunc0, efunc1)) @@ -271,10 +274,12 @@ def _inject_definitions(self, iet, storage): cbody = k.body # objects + standalones = as_list(cbody.standalones) + flatten(v.standalones) objs = as_list(cbody.objs) + flatten(v.objs) # allocs/pallocs allocs = as_list(cbody.allocs) + flatten(v.allocs) + stacks = as_list(cbody.stacks) + flatten(v.stacks) for tid, body in as_mapper(v.pallocs, itemgetter(0), itemgetter(1)).items(): header = self.lang.Region._make_header(tid.symbolic_size) init = self.lang['thread-num'](retobj=tid) @@ -295,8 +300,10 @@ def _inject_definitions(self, iet, storage): # efuncs efuncs.extend(v.efuncs) - mapper[cbody] = cbody._rebuild(allocs=allocs, maps=maps, objs=objs, - unmaps=unmaps, frees=frees) + mapper[cbody] = cbody._rebuild( + standalones=standalones, allocs=allocs, stacks=stacks, + maps=maps, objs=objs, unmaps=unmaps, frees=frees + ) processed = Transformer(mapper, nested=True).visit(iet) diff --git a/devito/passes/iet/engine.py b/devito/passes/iet/engine.py index f74dfec110..c4e98e715c 100644 --- a/devito/passes/iet/engine.py +++ b/devito/passes/iet/engine.py @@ -46,6 +46,7 @@ def __init__(self, iet, options=None, sregistry=None, **kwargs): self.includes = [] self.headers = [] + self.namespaces = [] self.globals = [] # Stash immutable information useful for one or more compiler passes @@ -86,6 +87,7 @@ def apply(self, func, **kwargs): self.includes.extend(as_tuple(metadata.get('includes'))) self.headers.extend(as_tuple(metadata.get('headers'))) + self.namespaces.extend(as_tuple(metadata.get('namespaces'))) self.globals.extend(as_tuple(metadata.get('globals'))) # Update jit-compiler if necessary @@ -119,6 +121,7 @@ def apply(self, func, **kwargs): # Uniqueness self.includes = filter_ordered(self.includes) self.headers = filter_ordered(self.headers, key=str) + self.namespaces = filter_ordered(self.namespaces, key=str) self.globals = filter_ordered(self.globals) def visit(self, func, **kwargs): diff --git a/devito/passes/iet/langbase.py b/devito/passes/iet/langbase.py index 91e68fc02b..a8df344dbc 100644 --- a/devito/passes/iet/langbase.py +++ b/devito/passes/iet/langbase.py @@ -1,4 +1,5 @@ from functools import singledispatch +from itertools import takewhile from abc import ABC import cgen as c @@ -6,12 +7,12 @@ from devito.data import FULL from devito.ir import (DummyExpr, Call, Conditional, Expression, List, Prodder, ParallelIteration, ParallelBlock, PointerCast, EntryFunction, - AsyncCallable, FindNodes, FindSymbols) + AsyncCallable, FindNodes, FindSymbols, IsPerfectIteration) from devito.mpi.distributed import MPICommObject from devito.passes import is_on_device from devito.passes.iet.engine import iet_pass from devito.symbolics import Byref, CondNe, SizeOf -from devito.tools import as_list, prod +from devito.tools import as_list, is_integer, prod from devito.types import Symbol, QueueID, Wildcard __all__ = ['LangBB', 'LangTransformer'] @@ -160,7 +161,8 @@ def __init__(self, key, sregistry, platform, compiler): Parameters ---------- key : callable, optional - Return True if an Iteration can and should be parallelized, False otherwise. + Return True if an Iteration can and should be parallelized, + False otherwise. sregistry : SymbolRegistry The symbol registry, to access the symbols appearing in an IET. platform : Platform @@ -215,6 +217,169 @@ def Prodder(self): return self.lang.Prodder +class ShmTransformer(LangTransformer): + + """ + Abstract base class for LangTransformers that want to emit + shared-memory-parallel IETs for CPUs. + """ + + def __init__(self, key, sregistry, options, platform, compiler): + """ + Parameters + ---------- + key : callable, optional + Return True if an Iteration can and should be parallelized, + False otherwise. + sregistry : SymbolRegistry + The symbol registry, to access the symbols appearing in an IET. + options : dict + The optimization options. + Accepted: ['par-collapse-ncores', 'par-collapse-work', + 'par-chunk-nonaffine', 'par-dynamic-work', 'par-nested'] + * 'par-collapse-ncores': use a collapse clause if the number of + available physical cores is greater than this threshold. + * 'par-collapse-work': use a collapse clause if the trip count of the + collapsable Iterations is statically known to exceed this threshold. + * 'par-chunk-nonaffine': coefficient to adjust the chunk size in + non-affine parallel Iterations. + * 'par-dynamic-work': use dynamic scheduling if the operation count per + iteration exceeds this threshold. Otherwise, use static scheduling. + * 'par-nested': nested parallelism if the number of hyperthreads + per core is greater than this threshold. + platform : Platform + The underlying platform. + compiler : Compiler + The underlying JIT compiler. + """ + super().__init__(key, sregistry, platform, compiler) + + self.collapse_ncores = options['par-collapse-ncores'] + self.collapse_work = options['par-collapse-work'] + self.chunk_nonaffine = options['par-chunk-nonaffine'] + self.dynamic_work = options['par-dynamic-work'] + self.nested = options['par-nested'] + + @property + def ncores(self): + return self.platform.cores_physical + + @property + def nhyperthreads(self): + return self.platform.threads_per_core + + @property + def nthreads(self): + return self.sregistry.nthreads + + @property + def nthreads_nested(self): + return self.sregistry.nthreads_nested + + @property + def nthreads_nonaffine(self): + return self.sregistry.nthreads_nonaffine + + @property + def threadid(self): + return self.sregistry.threadid + + def _score_candidate(self, n0, root, collapsable=()): + """ + The score of a collapsable nest depends on the number of fully-parallel + Iterations and their position in the nest (the outer, the better). + """ + nest = [root] + list(collapsable) + n = len(nest) + + # Number of fully-parallel collapsable Iterations + key = lambda i: i.is_ParallelNoAtomic + fp_iters = list(takewhile(key, nest)) + n_fp_iters = len(fp_iters) + + # Number of parallel-if-atomic collapsable Iterations + key = lambda i: i.is_ParallelAtomic + pia_iters = list(takewhile(key, nest)) + n_pia_iters = len(pia_iters) + + # Prioritize the Dimensions that are more likely to define larger + # iteration spaces + key = lambda d: (not d.is_Derived or + (d.is_Custom and not is_integer(d.symbolic_size)) or + (d.is_Block and d._depth == 1)) + + fpdims = [i.dim for i in fp_iters] + n_fp_iters_large = len([d for d in fpdims if key(d)]) + + piadims = [i.dim for i in pia_iters] + n_pia_iters_large = len([d for d in piadims if key(d)]) + + return ( + int(n_fp_iters == n), # Fully-parallel nest + n_fp_iters_large, + n_fp_iters, + n_pia_iters_large, + n_pia_iters, + -(n0 + 1), # The outer, the better + n, + ) + + def _select_candidates(self, candidates): + assert candidates + + if self.ncores < self.collapse_ncores: + return candidates[0], [] + + mapper = {} + for n0, root in enumerate(candidates): + + # Score `root` in isolation + mapper[(root, ())] = self._score_candidate(n0, root) + + collapsable = [] + for n, i in enumerate(candidates[n0+1:], n0+1): + # The Iteration nest [root, ..., i] must be perfect + if not IsPerfectIteration(depth=i).visit(root): + break + + # Loops are collapsable only if none of the iteration variables + # appear in initializer expressions. For example, the following + # two loops cannot be collapsed + # + # for (i = ... ) + # for (j = i ...) + # ... + # + # Here, we make sure this won't happen + if any(j.dim in i.symbolic_min.free_symbols for j in candidates[n0:n]): + break + + # Can't collapse SIMD-vectorized Iterations + if i.is_Vectorized: + break + + # Would there be enough work per parallel iteration? + nested = candidates[n+1:] + if nested: + try: + work = prod([int(j.dim.symbolic_size) for j in nested]) + if work < self.collapse_work: + break + except TypeError: + pass + + collapsable.append(i) + + # Score `root + collapsable` + v = tuple(collapsable) + mapper[(root, v)] = self._score_candidate(n0, root, v) + + # Retrieve the candidates with highest score + root, collapsable = max(mapper, key=mapper.get) + + return root, list(collapsable) + + class DeviceAwareMixin(object): @property diff --git a/devito/passes/iet/orchestration.py b/devito/passes/iet/orchestration.py index 39fd286f1a..49bc3563d7 100644 --- a/devito/passes/iet/orchestration.py +++ b/devito/passes/iet/orchestration.py @@ -195,9 +195,15 @@ def fetchupdate(layer, iet, sync_ops, lang, sregistry): @fetchupdate.register(HostLayer) def _(layer, iet, sync_ops, lang, sregistry): + try: + qid = sregistry.queue0 + except AttributeError: + qid = None + body = list(iet.body) try: - body.extend([lang._map_update_device(s.target, s.imask) for s in sync_ops]) + body.extend([lang._map_update_device(s.target, s.imask, qid=qid) + for s in sync_ops]) name = 'init_from_%s' % layer.suffix except NotImplementedError: name = 'init_to_%s' % layer.suffix diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py index ef1cd38af2..b41b871b55 100644 --- a/devito/passes/iet/parpragma.py +++ b/devito/passes/iet/parpragma.py @@ -1,5 +1,3 @@ -from itertools import takewhile - import numpy as np import cgen as c from cached_property import cached_property @@ -13,7 +11,7 @@ retrieve_iteration_tree, IMask, VECTORIZED) from devito.passes.iet.engine import iet_pass from devito.passes.iet.langbase import (LangBB, LangTransformer, DeviceAwareMixin, - make_sections_from_imask) + ShmTransformer, make_sections_from_imask) from devito.symbolics import INT, ccode from devito.tools import as_tuple, flatten, is_integer, prod from devito.types import Symbol @@ -204,164 +202,16 @@ def collapsed(self): return tuple(ret) -class PragmaShmTransformer(PragmaSimdTransformer): +class PragmaShmTransformer(ShmTransformer, PragmaSimdTransformer): """ - Abstract base class for PragmaTransformers capable of emitting SIMD-parallel - and shared-memory-parallel IETs. + PragmaTransformer capable of emitting SIMD-parallel and shared-memory-parallel + IETs for CPUs. """ def __init__(self, sregistry, options, platform, compiler): - """ - Parameters - ---------- - sregistry : SymbolRegistry - The symbol registry, to access the symbols appearing in an IET. - options : dict - The optimization options. Accepted: ['par-collapse-ncores', - 'par-collapse-work', 'par-chunk-nonaffine', 'par-dynamic-work', 'par-nested'] - * 'par-collapse-ncores': use a collapse clause if the number of - available physical cores is greater than this threshold. - * 'par-collapse-work': use a collapse clause if the trip count of the - collapsable Iterations is statically known to exceed this threshold. - * 'par-chunk-nonaffine': coefficient to adjust the chunk size in - non-affine parallel Iterations. - * 'par-dynamic-work': use dynamic scheduling if the operation count per - iteration exceeds this threshold. Otherwise, use static scheduling. - * 'par-nested': nested parallelism if the number of hyperthreads per core - is greater than this threshold. - platform : Platform - The underlying platform. - compiler : Compiler - The underlying JIT compiler. - """ key = lambda i: i.is_ParallelRelaxed and not i.is_Vectorized - super().__init__(key, sregistry, platform, compiler) - - self.collapse_ncores = options['par-collapse-ncores'] - self.collapse_work = options['par-collapse-work'] - self.chunk_nonaffine = options['par-chunk-nonaffine'] - self.dynamic_work = options['par-dynamic-work'] - self.nested = options['par-nested'] - - @property - def ncores(self): - return self.platform.cores_physical - - @property - def nhyperthreads(self): - return self.platform.threads_per_core - - @property - def nthreads(self): - return self.sregistry.nthreads - - @property - def nthreads_nested(self): - return self.sregistry.nthreads_nested - - @property - def nthreads_nonaffine(self): - return self.sregistry.nthreads_nonaffine - - @property - def threadid(self): - return self.sregistry.threadid - - def _score_candidate(self, n0, root, collapsable=()): - """ - The score of a collapsable nest depends on the number of fully-parallel - Iterations and their position in the nest (the outer, the better). - """ - nest = [root] + list(collapsable) - n = len(nest) - - # Number of fully-parallel collapsable Iterations - key = lambda i: i.is_ParallelNoAtomic - fp_iters = list(takewhile(key, nest)) - n_fp_iters = len(fp_iters) - - # Number of parallel-if-atomic collapsable Iterations - key = lambda i: i.is_ParallelAtomic - pia_iters = list(takewhile(key, nest)) - n_pia_iters = len(pia_iters) - - # Prioritize the Dimensions that are more likely to define larger - # iteration spaces - key = lambda d: (not d.is_Derived or - (d.is_Custom and not is_integer(d.symbolic_size)) or - (d.is_Block and d._depth == 1)) - - fpdims = [i.dim for i in fp_iters] - n_fp_iters_large = len([d for d in fpdims if key(d)]) - - piadims = [i.dim for i in pia_iters] - n_pia_iters_large = len([d for d in piadims if key(d)]) - - return ( - int(n_fp_iters == n), # Fully-parallel nest - n_fp_iters_large, - n_fp_iters, - n_pia_iters_large, - n_pia_iters, - -(n0 + 1), # The outer, the better - n, - ) - - def _select_candidates(self, candidates): - assert candidates - - if self.ncores < self.collapse_ncores: - return candidates[0], [] - - mapper = {} - for n0, root in enumerate(candidates): - - # Score `root` in isolation - mapper[(root, ())] = self._score_candidate(n0, root) - - collapsable = [] - for n, i in enumerate(candidates[n0+1:], n0+1): - # The Iteration nest [root, ..., i] must be perfect - if not IsPerfectIteration(depth=i).visit(root): - break - - # Loops are collapsable only if none of the iteration variables - # appear in initializer expressions. For example, the following - # two loops cannot be collapsed - # - # for (i = ... ) - # for (j = i ...) - # ... - # - # Here, we make sure this won't happen - if any(j.dim in i.symbolic_min.free_symbols for j in candidates[n0:n]): - break - - # Can't collapse SIMD-vectorized Iterations - if i.is_Vectorized: - break - - # Would there be enough work per parallel iteration? - nested = candidates[n+1:] - if nested: - try: - work = prod([int(j.dim.symbolic_size) for j in nested]) - if work < self.collapse_work: - break - except TypeError: - pass - - collapsable.append(i) - - # Score `root + collapsable` - v = tuple(collapsable) - mapper[(root, v)] = self._score_candidate(n0, root, v) - - # Retrieve the candidates with highest score - root, collapsable = max(mapper, key=mapper.get) - - return root, list(collapsable) + super().__init__(key, sregistry, options, platform, compiler) def _make_reductions(self, partree): if not any(i.is_ParallelAtomic for i in partree.collapsed): diff --git a/devito/symbolics/extended_sympy.py b/devito/symbolics/extended_sympy.py index 8626ecf094..b8de8bade8 100644 --- a/devito/symbolics/extended_sympy.py +++ b/devito/symbolics/extended_sympy.py @@ -16,9 +16,10 @@ __all__ = ['CondEq', 'CondNe', 'IntDiv', 'CallFromPointer', # noqa 'CallFromComposite', 'FieldFromPointer', 'FieldFromComposite', 'ListInitializer', 'Byref', 'IndexedPointer', 'Cast', 'DefFunction', - 'InlineIf', 'Keyword', 'String', 'Macro', 'MacroArgument', - 'CustomType', 'Deref', 'INT', 'FLOAT', 'DOUBLE', 'VOID', 'Null', - 'SizeOf', 'rfunc', 'cast_mapper', 'BasicWrapperMixin'] + 'InlineIf', 'ReservedWord', 'Keyword', 'String', 'Macro', 'Class', + 'MacroArgument', 'CustomType', 'Deref', 'Namespace', 'Rvalue', + 'INT', 'FLOAT', 'DOUBLE', 'VOID', 'Null', 'SizeOf', 'rfunc', + 'cast_mapper', 'BasicWrapperMixin'] class CondEq(sympy.Eq): @@ -88,8 +89,7 @@ def __new__(cls, lhs, rhs, params=None): # Perhaps it's a symbolic RHS -- but we wanna be sure it's of type int if not hasattr(rhs, 'dtype'): raise ValueError("Symbolic RHS `%s` lacks dtype" % rhs) - if not issubclass(rhs.dtype, np.integer) or \ - not (rhs.is_Constant and issubclass(rhs.dtype, np.integer)): + if not issubclass(rhs.dtype, np.integer): raise ValueError("Symbolic RHS `%s` must be of type `int`, found " "`%s` instead" % (rhs, rhs.dtype)) rhs = sympify(rhs) @@ -517,6 +517,14 @@ class Macro(ReservedWord): pass +class Class(ReservedWord): + + def __str__(self): + return "class %s" % self.value + + __repr__ = __str__ + + class MacroArgument(sympy.Symbol): def __str__(self): @@ -534,8 +542,12 @@ class DefFunction(Function, Pickable): """ __rargs__ = ('name', 'arguments') + __rkwargs__ = ('template',) + + def __new__(cls, name, arguments=None, template=None, **kwargs): + if isinstance(name, str): + name = Keyword(name) - def __new__(cls, name, arguments=None, **kwargs): _arguments = [] for i in as_tuple(arguments): if isinstance(i, str): @@ -545,12 +557,25 @@ def __new__(cls, name, arguments=None, **kwargs): _arguments.append(ReservedWord(i)) else: _arguments.append(i) - arguments = tuple(_arguments) - if isinstance(name, str): - name = Keyword(name) - obj = Function.__new__(cls, name, Tuple(*arguments)) + + _template = [] + for i in as_tuple(template): + if isinstance(i, str): + # Same story as above + _template.append(ReservedWord(i)) + else: + _template.append(i) + + args = [name] + args.append(Tuple(*_arguments)) + if _template: + args.append(Tuple(*_template)) + + obj = Function.__new__(cls, *args) obj._name = name - obj._arguments = arguments + obj._arguments = tuple(_arguments) + obj._template = tuple(_template) + return obj @property @@ -561,8 +586,17 @@ def name(self): def arguments(self): return self._arguments + @property + def template(self): + return self._template + def __str__(self): - return "%s(%s)" % (self.name, ', '.join(str(i) for i in self.arguments)) + if self.template: + template = '<%s>' % ','.join(str(i) for i in self.template) + else: + template = '' + arguments = ', '.join(str(i) for i in self.arguments) + return "%s%s(%s)" % (self.name, template, arguments) __repr__ = __str__ @@ -617,6 +651,90 @@ def __str__(self): __reduce_ex__ = Pickable.__reduce_ex__ +class Namespace(sympy.Expr, Pickable): + + """ + Symbolic representation of a C++ namespace `ns0::ns1::...`. + """ + + __rargs__ = ('items',) + + def __new__(cls, items, **kwargs): + normalized_items = [] + for i in as_tuple(items): + if isinstance(i, str): + normalized_items.append(ReservedWord(i)) + elif isinstance(i, ReservedWord): + normalized_items.append(i) + else: + raise ValueError("`items` must be iterable of str or ReservedWord") + + obj = sympy.Expr.__new__(cls) + obj._items = tuple(items) + + return obj + + def _hashable_content(self): + return super()._hashable_content() + self.items + + @property + def items(self): + return self._items + + def __str__(self): + return "::".join(str(i) for i in self.items) + + __repr__ = __str__ + + +class Rvalue(sympy.Expr, Pickable): + + """ + A generic C++ rvalue, that is a value that occupies a temporary location in + memory. + """ + + __rargs__ = ('expr',) + __rkwargs__ = ('namespace', 'init') + + def __new__(cls, expr, namespace=None, init=None): + args = [expr] + if namespace is not None: + args.append(namespace) + if init is not None: + args.append(init) + + obj = sympy.Expr.__new__(cls, *args) + + obj._expr = expr + obj._namespace = namespace + obj._init = init + + return obj + + @property + def expr(self): + return self._expr + + @property + def namespace(self): + return self._namespace + + @property + def init(self): + return self._init + + def __str__(self): + rvalue = str(self.expr) + if self.namespace: + rvalue = "%s::%s" % (self.namespace, rvalue) + if self.init: + rvalue = "%s%s" % (rvalue, self.init) + return rvalue + + __repr__ = __str__ + + # *** Casting class CastStar(object): diff --git a/devito/symbolics/printer.py b/devito/symbolics/printer.py index 17c04d8a99..6924ffffbd 100644 --- a/devito/symbolics/printer.py +++ b/devito/symbolics/printer.py @@ -235,11 +235,17 @@ def _print_TrigonometricFunction(self, expr): def _print_DefFunction(self, expr): arguments = [self._print(i) for i in expr.arguments] - return "%s(%s)" % (expr.name, ','.join(arguments)) + if expr.template: + template = '<%s>' % ','.join([str(i) for i in expr.template]) + else: + template = '' + return "%s%s(%s)" % (expr.name, template, ','.join(arguments)) def _print_Fallback(self, expr): return expr.__str__() + _print_Namespace = _print_Fallback + _print_Rvalue = _print_Fallback _print_MacroArgument = _print_Fallback _print_IndexedBase = _print_Fallback _print_IndexSum = _print_Fallback diff --git a/devito/tools/dtypes_lowering.py b/devito/tools/dtypes_lowering.py index 62776eefd5..70a378dae4 100644 --- a/devito/tools/dtypes_lowering.py +++ b/devito/tools/dtypes_lowering.py @@ -7,11 +7,13 @@ import numpy as np from cgen import dtype_to_ctype as cgen_dtype_to_ctype +from .utils import as_tuple + __all__ = ['int2', 'int3', 'int4', 'float2', 'float3', 'float4', 'double2', # noqa 'double3', 'double4', 'dtypes_vector_mapper', 'dtype_to_mpidtype', 'dtype_to_cstr', 'dtype_to_ctype', 'dtype_to_mpitype', 'dtype_len', 'ctypes_to_cstr', 'c_restrict_void_p', 'ctypes_vector_mapper', - 'is_external_ctype', 'infer_dtype'] + 'is_external_ctype', 'infer_dtype', 'CustomDtype'] # *** Custom np.dtypes @@ -93,6 +95,34 @@ def get_base_dtype(self, v, default=None): dtypes_vector_mapper.update({(v, 1): v for v in mapper.values()}) +# *** Custom types escaping both the numpy and ctypes namespaces + + +class CustomDtype(object): + + def __init__(self, name, template=None, modifier=None): + self.name = name + self.template = as_tuple(template) + self.modifier = modifier or '' + + def __eq__(self, other): + return (isinstance(other, CustomDtype) and + self.name == other.name and + self.template == other.template and + self.modifier == other.modifier) + + def __hash__(self): + return hash((self.name, self.template, self.modifier)) + + def __repr__(self): + template = '<%s>' % ','.join([str(i) for i in self.template]) + return "%s%s%s" % (self.name, + template if self.template else '', + self.modifier) + + __str__ = __repr__ + + # *** np.dtypes lowering @@ -180,6 +210,8 @@ def ctypes_to_cstr(ctype, toarray=None): """Translate ctypes types into C strings.""" if ctype in ctypes_vector_mapper.values(): retval = ctype.__name__ + elif isinstance(ctype, CustomDtype): + retval = str(ctype) elif issubclass(ctype, ctypes.Structure): retval = 'struct %s' % ctype.__name__ elif issubclass(ctype, ctypes.Union): diff --git a/devito/types/basic.py b/devito/types/basic.py index 53e8a87189..17835933e3 100644 --- a/devito/types/basic.py +++ b/devito/types/basic.py @@ -39,7 +39,8 @@ class CodeSymbol(object): * "liveness": `_mem_external`, `_mem_internal_eager`, `_mem_internal_lazy` * "space": `_mem_local`, `_mem_mapped`, `_mem_host` - * "scope": `_mem_stack`, `_mem_heap`, `_mem_constant`, `_mem_shared` + * "scope": `_mem_stack`, `_mem_heap`, `_mem_global`, `_mem_shared`, + `_mem_constant` For example, an object that is `<_mem_internal_lazy, _mem_local, _mem_heap>` is allocated within the Operator entry point, on either the host or device @@ -174,29 +175,36 @@ def _mem_host(self): @property def _mem_stack(self): """ - True if the associated data should be allocated on the stack, False otherwise. + True if the associated data is allocated on the stack, False otherwise. """ return False @property def _mem_heap(self): """ - True if the associated data gets allocated on the heap, False otherwise. + True if the associated data is allocated on the heap, False otherwise. """ return False + @property + def _mem_global(self): + """ + True if the symbol is globally scoped, False otherwise. + """ + return self._mem_constant + @property def _mem_constant(self): """ - True if the associated data gets allocated in global constant memory, - False otherwise. + True if the associated data is allocated in global constant memory, + False otherwise. This is a special case of `_mem_global`. """ return False @property def _mem_shared(self): """ - True if the associated data gets allocated in so called shared memory, + True if the associated data is allocated in so called shared memory, False otherwise. """ return False diff --git a/devito/types/object.py b/devito/types/object.py index 869e794481..3768dc76fd 100644 --- a/devito/types/object.py +++ b/devito/types/object.py @@ -168,26 +168,50 @@ class LocalObject(AbstractObject): LocalObjects encode their dtype as a class attribute. """ + default_initvalue = None + """ + The initial value may or may not be a class-level attribute. In the latter + case, it is passed to the constructor. + """ + __rargs__ = ('name',) - __rkwargs__ = ('cargs', 'liveness') + __rkwargs__ = ('cargs', 'initvalue', 'liveness', 'is_global') - def __init__(self, name, cargs=None, **kwargs): + def __init__(self, name, cargs=None, initvalue=None, liveness='lazy', + is_global=False, **kwargs): self.name = name self.cargs = as_tuple(cargs) + self.initvalue = initvalue or self.default_initvalue - self._liveness = kwargs.get('liveness', 'lazy') - assert self._liveness in ['eager', 'lazy'] + assert liveness in ['eager', 'lazy'] + self._liveness = liveness + + self._is_global = is_global def _hashable_content(self): - return super()._hashable_content() + self.cargs + (self.liveness,) + return (super()._hashable_content() + + self.cargs + + (self.initvalue, self.liveness, self.is_global)) @property def liveness(self): return self._liveness + @property + def is_global(self): + return self._is_global + @property def free_symbols(self): - return super().free_symbols | set(self.cargs) + ret = set() + ret.update(super().free_symbols) + for i in self.cargs: + try: + ret.update(i.free_symbols) + except AttributeError: + # E.g., pure integers + pass + return ret @property def _C_init(self): @@ -211,6 +235,13 @@ def _C_free(self): """ return None + _C_modifier = None + """ + A modifier added to the LocalObject's C declaration when the object appears + in a function signature. For example, a subclass might define `_C_modifier = '&'` + to impose pass-by-reference semantics. + """ + @property def _mem_internal_eager(self): return self._liveness == 'eager' @@ -218,3 +249,7 @@ def _mem_internal_eager(self): @property def _mem_internal_lazy(self): return self._liveness == 'lazy' + + @property + def _mem_global(self): + return self._is_global diff --git a/docker/Dockerfile.amd b/docker/Dockerfile.amd index bdcc1c5a26..6a104d2872 100644 --- a/docker/Dockerfile.amd +++ b/docker/Dockerfile.amd @@ -4,7 +4,6 @@ ############################################################## ARG ROCM_VERSION=5.5.1 -ARG arch="aomp" FROM rocm/dev-ubuntu-22.04:${ROCM_VERSION}-complete as sdk-base diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index 238ab4ff5e..bed0bbad24 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -1,9 +1,9 @@ # syntax=docker/dockerfile:1 ############################################################## -# This Dockerfile contains the Devito codes and can be built using different base images. +# This Dockerfile builds a base image to run Devito on generic CPU +# architectures using GCC compilers and OpenMPI. ############################################################## -ARG arch=gcc ARG OMPI_BRANCH="v4.1.4" # Base image @@ -56,87 +56,3 @@ ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/openmpi/lib # Env vars defaults ENV DEVITO_ARCH="gcc" ENV DEVITO_LANGUAGE="openmp" - -############################################################## -# Intel Oneapi base -############################################################## -FROM base as oneapi - -# Download the key to system keyring -# https://www.intel.com/content/www/us/en/develop/documentation/installation-guide-for-intel-oneapi-toolkits-linux/top/installation/install-using-package-managers/apt.html#apt -RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor > /usr/share/keyrings/oneapi-archive-keyring.gpg -RUN echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list - -# Intel advisor and drivers -RUN apt-get update -y && \ - # advisor - apt-get install -y intel-oneapi-advisor - -# Drivers mandatory for intel gpu -# https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal.html#ubuntu-20-04-focal -RUN wget -qO - https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor > /usr/share/keyrings/intel-graphics.gpg -RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu focal main" > /etc/apt/sources.list.d/intel.list - -RUN apt-get update -y && apt-get dist-upgrade -y && \ - apt-get install -y intel-opencl-icd intel-level-zero-gpu level-zero level-zero-dev \ - intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \ - libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev - -############################################################## -# ICC image -# This is a legacy setup that is not built anymore but kept for reference -############################################################## -FROM oneapi as icc - -RUN apt-get update -y && apt-get install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic intel-oneapi-mpi-devel && \ - apt-get clean && apt-get autoclean && apt-get autoremove -y && \ - rm -rf /var/lib/apt/lists/* - -# Devito config -ENV DEVITO_ARCH="icc" -ENV DEVITO_LANGUAGE="openmp" -# MPICC compiler for mpi4py -ENV MPICC=mpiicc -ENV MPI4PY_FLAGS='. /opt/intel/oneapi/setvars.sh && CFLAGS="-cc=icc"' - -############################################################## -# ICX image -############################################################## -FROM oneapi as icx - -RUN apt-get update -y && apt-get install -y intel-oneapi-compiler-dpcpp-cpp intel-oneapi-mpi-devel && \ - apt-get clean && apt-get autoclean && apt-get autoremove -y && \ - rm -rf /var/lib/apt/lists/* - -# Devito config -ENV DEVITO_ARCH="icx" -ENV DEVITO_LANGUAGE="openmp" -# MPICC compiler for mpi4py -ENV MPICC=mpiicc -ENV MPI4PY_FLAGS='. /opt/intel/oneapi/setvars.sh && CFLAGS="-cc=icx"' - -############################################################## -# ICX hpc image -############################################################## -FROM oneapi as icx-hpc - -# Install both icc and icx to avoid missing dependencies -RUN apt-get update -y && \ - apt-get install -y intel-oneapi-compiler-dpcpp-cpp intel-oneapi-mpi-devel && \ - apt-get install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic - -# Missig components -# https://www.intel.com/content/www/us/en/developer/tools/oneapi/hpc-toolkit-download.html?operatingsystem=linux&distributions=aptpackagemanager -RUN curl -f "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/ebf5d9aa-17a7-46a4-b5df-ace004227c0e/l_dpcpp-cpp-compiler_p_2023.2.1.8.sh" -O && \ - chmod +x l_dpcpp-cpp-compiler_p_2023.2.1.8.sh && ./l_dpcpp-cpp-compiler_p_2023.2.1.8.sh -a -s --eula accept && \ - rm l_dpcpp-cpp-compiler_p_2023.2.1.8.sh - -RUN apt-get clean && apt-get autoclean && apt-get autoremove -y && \ - rm -rf /var/lib/apt/lists/* - -# Devito config -ENV DEVITO_ARCH="icx" -ENV DEVITO_LANGUAGE="openmp" -# MPICC compiler for mpi4py -ENV MPICC=mpiicc -ENV MPI4PY_FLAGS='. /opt/intel/oneapi/setvars.sh && CFLAGS="-cc=icx"' \ No newline at end of file diff --git a/docker/Dockerfile.devito b/docker/Dockerfile.devito index 703964e45e..aeda36d615 100644 --- a/docker/Dockerfile.devito +++ b/docker/Dockerfile.devito @@ -1,5 +1,5 @@ ############################################################## -# This Dockerfile contains the Devito codes and can be built using different base images. +# This Dockerfile contains Devito and can be built using different base images. ############################################################## # Base image with compilers diff --git a/docker/Dockerfile.intel b/docker/Dockerfile.intel new file mode 100644 index 0000000000..48757d9776 --- /dev/null +++ b/docker/Dockerfile.intel @@ -0,0 +1,114 @@ +# syntax=docker/dockerfile:1 +############################################################## +# This Dockerfile contains the Intel OneAPI toolkit for Devito +############################################################## + +# Base image +FROM ubuntu:22.04 as base + +ENV DEBIAN_FRONTEND noninteractive + +# Install python +RUN apt-get update && \ + apt-get install -y dh-autoreconf python3-venv python3-dev python3-pip + +# Install for basic base not containing it +RUN apt-get install -y vim wget git flex libnuma-dev tmux \ + numactl hwloc curl \ + autoconf libtool build-essential procps + +# Install tmpi +RUN curl https://raw.githubusercontent.com/Azrael3000/tmpi/master/tmpi -o /usr/local/bin/tmpi + +# Install OpenGL library, necessary for the installation of GemPy +RUN apt-get install -y libgl1-mesa-glx + +RUN apt-get clean && apt-get autoclean && apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* + +EXPOSE 8888 +CMD ["/bin/bash"] + +############################################################## +# Intel OneAPI standard image +############################################################## +FROM base as oneapi + +# Download the key to system keyring +# https://www.intel.com/content/www/us/en/develop/documentation/installation-guide-for-intel-oneapi-toolkits-linux/top/installation/install-using-package-managers/apt.html#apt +RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor > /usr/share/keyrings/oneapi-archive-keyring.gpg +RUN echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list + +# Intel advisor and drivers +RUN apt-get update -y && \ + # advisor + apt-get install -y intel-oneapi-advisor + +# Drivers mandatory for intel gpu +# https://dgpu-docs.intel.com/driver/installation.html#ubuntu-install-steps +RUN wget -qO - https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor > /usr/share/keyrings/intel-graphics.gpg +RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu jammy unified" > /etc/apt/sources.list.d/intel-gpu-jammy.list + +RUN apt-get update -y && apt-get dist-upgrade -y && \ + # Compute and media runtime + apt-get install -y intel-opencl-icd intel-level-zero-gpu level-zero \ + intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \ + libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \ + libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \ + mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo \ + # Development packages + libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev + +############################################################## +# ICC image +# This is a legacy setup that is not built anymore but kept for reference +############################################################## +FROM oneapi as icc + +RUN apt-get update -y && apt-get install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic intel-oneapi-mpi-devel && \ + apt-get clean && apt-get autoclean && apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* + +# Devito config +ENV DEVITO_ARCH="icc" +ENV DEVITO_LANGUAGE="openmp" +ENV DEVITO_PLATFORM="intel64" +# MPICC compiler for mpi4py +ENV MPICC=mpiicc +ENV MPI4PY_FLAGS='. /opt/intel/oneapi/setvars.sh && CFLAGS="-cc=icc"' + +############################################################## +# ICX OpenMP image +############################################################## +FROM oneapi as icx + +RUN apt-get update -y && apt-get install -y intel-oneapi-compiler-dpcpp-cpp intel-oneapi-mpi-devel && \ + apt-get clean && apt-get autoclean && apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* + +# Devito config +ENV DEVITO_ARCH="icx" +ENV DEVITO_LANGUAGE="openmp" +# MPICC compiler for mpi4py +ENV MPICC=mpiicc +ENV MPI4PY_FLAGS='. /opt/intel/oneapi/setvars.sh && CFLAGS="-cc=icx"' + +############################################################## +# ICX SYCL CPU image +############################################################## +FROM icx as cpu-sycl + +# Devito config +ENV DEVITO_ARCH="sycl" +ENV DEVITO_LANGUAGE="sycl" +ENV DEVITO_PLATFORM="intel64" + +############################################################## +# ICX SYCL GPU image +############################################################## +FROM icx as gpu-sycl + +# Devito config +ENV DEVITO_ARCH="sycl" +ENV DEVITO_LANGUAGE="sycl" +ENV DEVITO_PLATFORM="intelgpuX" diff --git a/tests/test_iet.py b/tests/test_iet.py index d148dc1539..6ee6c13ca7 100644 --- a/tests/test_iet.py +++ b/tests/test_iet.py @@ -2,18 +2,21 @@ from ctypes import c_void_p import cgen +import numpy as np import sympy from devito import (Eq, Grid, Function, TimeFunction, Operator, Dimension, # noqa switchconfig) from devito.ir.iet import (Call, Callable, Conditional, DummyExpr, Iteration, List, KernelLaunch, Lambda, ElementalFunction, CGen, FindSymbols, - filter_iterations, make_efunc, retrieve_iteration_tree) + filter_iterations, make_efunc, retrieve_iteration_tree, + Transformer) from devito.ir import SymbolRegistry from devito.passes.iet.engine import Graph from devito.passes.iet.languages.C import CDataManager -from devito.symbolics import Byref, FieldFromComposite, InlineIf, Macro -from devito.tools import as_tuple +from devito.symbolics import (Byref, FieldFromComposite, InlineIf, Macro, Class, + FLOAT) +from devito.tools import CustomDtype, as_tuple, dtype_to_ctype from devito.types import Array, LocalObject, Symbol @@ -143,7 +146,7 @@ def test_list_denesting(): def test_make_cpp_parfor(): """ - Test construction of a CPP parallel for. This excites the IET construction + Test construction of a C++ parallel for. This excites the IET construction machinery in several ways, in particular by using Lambda nodes (to generate C++ lambda functions) and nested Calls. """ @@ -273,6 +276,59 @@ def _C_free(self): }""" +def test_cpp_local_object(): + """ + Test C++ support for LocalObjects. + """ + + class MyObject(LocalObject): + dtype = CustomDtype('dummy') + + # Locally-scoped objects are declared in the function body + lo0 = MyObject('obj0') + + # Globally-scoped objects must not be declared in the function body + lo1 = MyObject('obj1', is_global=True) + + # A LocalObject using both a template and a modifier + class SpecialObject(LocalObject): + dtype = CustomDtype('bar', template=('int', 'float'), modifier='&') + + lo2 = SpecialObject('obj2') + + # A LocalObject instantiated and subsequently assigned a value + lo3 = MyObject('obj3', initvalue=Macro('meh')) + + # A LocalObject instantiated calling its 2-args constructor and subsequently + # assigned a value + lo4 = MyObject('obj4', cargs=(1, 2), initvalue=Macro('meh')) + + # A LocalObject with generic sympy exprs used as constructor args + expr = sympy.Function('ceil')(FLOAT(Symbol(name='s'))**-1) + lo5 = MyObject('obj5', cargs=(expr,), initvalue=Macro('meh')) + + # A LocalObject with class-level initvalue and numeric dtype + class SpecialObject2(LocalObject): + dtype = dtype_to_ctype(np.float32) + default_initvalue = Macro('meh') + + lo6 = SpecialObject2('obj6') + + iet = Call('foo', [lo0, lo1, lo2, lo3, lo4, lo5, lo6]) + iet = ElementalFunction('foo', iet, parameters=()) + + dm = CDataManager(sregistry=None) + iet = CDataManager.place_definitions.__wrapped__(dm, iet)[0] + + assert 'dummy obj0;' in str(iet) + assert 'dummy obj1;' not in str(iet) + assert 'bar& obj2;' in str(iet) + assert 'dummy obj3 = meh;' in str(iet) + assert 'dummy obj4(1,2) = meh;' in str(iet) + assert 'dummy obj5(ceil(1.0F/(float)s)) = meh;' in str(iet) + assert 'float obj6 = meh;' in str(iet) + + def test_call_indexed(): grid = Grid(shape=(10, 10)) @@ -302,6 +358,28 @@ def test_call_retobj_indexed(): assert not call.defines +def test_call_lambda_transform(): + grid = Grid(shape=(10, 10)) + x, y = grid.dimensions + + u = Function(name='u', grid=grid) + + e0 = DummyExpr(x, 1) + e1 = DummyExpr(y, 1) + + body = List(body=[e0, e1]) + call = Call('foo', [u, Lambda(body)]) + + subs = {e0: DummyExpr(x, 2), e1: DummyExpr(y, 2)} + + assert str(Transformer(subs).visit(call)) == """\ +foo(u_vec,[]() +{ + x = 2; + y = 2; +});""" + + def test_null_init(): grid = Grid(shape=(10, 10)) @@ -313,7 +391,7 @@ def test_null_init(): assert expr.defines == (u.indexed,) -def test_templates(): +def test_templates_callable(): grid = Grid(shape=(10, 10)) x, y = grid.dimensions @@ -330,6 +408,17 @@ def test_templates(): }""" +def test_templates_call(): + grid = Grid(shape=(10, 10)) + x, y = grid.dimensions + + u = Function(name='u', grid=grid) + + foo = Call('foo', u, templates=[Class('a'), Class('b')]) + + assert str(foo) == "foo(u_vec);" + + def test_kernel_launch(): grid = Grid(shape=(10, 10)) diff --git a/tests/test_symbolics.py b/tests/test_symbolics.py index fcc7395b05..3d3500c98e 100644 --- a/tests/test_symbolics.py +++ b/tests/test_symbolics.py @@ -10,7 +10,8 @@ from devito.ir import Expression, FindNodes from devito.symbolics import (retrieve_functions, retrieve_indexed, evalrel, # noqa CallFromPointer, Cast, DefFunction, FieldFromPointer, - INT, FieldFromComposite, IntDiv, ccode, uxreplace, + INT, FieldFromComposite, IntDiv, Namespace, Rvalue, + ReservedWord, ListInitializer, ccode, uxreplace, retrieve_derivatives) from devito.tools import as_tuple from devito.types import (Array, Bundle, FIndexed, LocalObject, Object, @@ -287,6 +288,52 @@ def test_intdiv(): assert ccode(v) == 'b*((a + b) / 2) + 3' +def test_def_function(): + foo0 = DefFunction('foo', arguments=['a', 'b'], template=['int']) + foo1 = DefFunction('foo', arguments=['a', 'b'], template=['int']) + foo2 = DefFunction('foo', arguments=['a', 'b']) + foo3 = DefFunction('foo', arguments=['a']) + + # Code generation + assert str(foo0) == 'foo(a, b)' + assert str(foo3) == 'foo(a)' + + # Hashing and equality + assert hash(foo0) == hash(foo1) + assert foo0 == foo1 + assert hash(foo0) != hash(foo2) + assert hash(foo2) != hash(foo3) + + # Reconstruction + assert foo0 == foo0._rebuild() + assert str(foo0._rebuild('bar', template=['float'])) == 'bar(a, b)' + + +def test_namespace(): + ns0 = Namespace(['std', 'algorithms', 'parallel']) + assert str(ns0) == 'std::algorithms::parallel' + + ns1 = Namespace(['std']) + ns2 = Namespace(['std', 'algorithms', 'parallel']) + + # Test hashing and equality + assert hash(ns0) != hash(ns1) # Same reason as above + assert ns0 != ns1 + assert hash(ns0) == hash(ns2) + assert ns0 == ns2 + + # Free symbols + assert not ns0.free_symbols + + +def test_rvalue(): + ctype = ReservedWord('dummytype') + ns = Namespace(['my', 'namespace']) + init = ListInitializer(()) + + assert str(Rvalue(ctype, ns, init)) == 'my::namespace::dummytype{}' + + def test_cast(): s = Symbol(name='s', dtype=np.float32) diff --git a/tests/test_unexpansion.py b/tests/test_unexpansion.py index 8a4dcbbfed..97b855326d 100644 --- a/tests/test_unexpansion.py +++ b/tests/test_unexpansion.py @@ -4,6 +4,7 @@ from conftest import assert_structure, get_params, get_arrays, check_array from devito import (Buffer, Eq, Function, TimeFunction, Grid, Operator, Substitutions, Coefficient, cos, sin) +from devito.finite_differences import Weights from devito.arch.compiler import OneapiCompiler from devito.ir import Expression, FindNodes, FindSymbols from devito.parameters import switchconfig, configuration @@ -91,7 +92,9 @@ def test_multiple_cross_derivs(self, coeffs, expected): op.cfunction # w0, w1, ... - assert len(op._globals) == expected + functions = FindSymbols().visit(op) + weights = [f for f in functions if isinstance(f, Weights)] + assert len(weights) == expected class Test1Pass(object):