diff --git a/.github/workflows/docker-bases.yml b/.github/workflows/docker-bases.yml
index 96846320fc..585ea16911 100644
--- a/.github/workflows/docker-bases.yml
+++ b/.github/workflows/docker-bases.yml
@@ -10,6 +10,7 @@ on:
       - '/docker/Dockerfile.nvidia'
       - '/docker/Dockerfile.cpu'
       - '/docker/Dockerfile.amd'
+      - '/docker/Dockerfile.intel'
   workflow_dispatch:
     inputs:
       tags:
@@ -95,12 +96,32 @@ jobs:
         uses: docker/build-push-action@v3
         with:
           context: .
-          file: './docker/Dockerfile.cpu'
+          file: './docker/Dockerfile.intel'
           push: true
           target: 'icx'
           build-args: 'arch=icx'
           tags: 'devitocodes/bases:cpu-icx'
 
+      - name: SYCL CPU image
+        uses: docker/build-push-action@v3
+        with:
+          context: .
+          file: './docker/Dockerfile.intel'
+          push: true
+          target: 'cpu-sycl'
+          build-args: 'arch=cpu-sycl'
+          tags: 'devitocodes/bases:cpu-sycl'
+
+      - name: SYCL GPU image
+        uses: docker/build-push-action@v3
+        with:
+          context: .
+          file: './docker/Dockerfile.intel'
+          push: true
+          target: 'gpu-sycl'
+          build-args: 'arch=gpu-sycl'
+          tags: 'devitocodes/bases:gpu-sycl'
+
 #######################################################
 ################### Nvidia nvhpc ######################
 #######################################################
diff --git a/devito/arch/archinfo.py b/devito/arch/archinfo.py
index cccef94712..7563359669 100644
--- a/devito/arch/archinfo.py
+++ b/devito/arch/archinfo.py
@@ -29,7 +29,7 @@
            # Generic GPUs
            'AMDGPUX', 'NVIDIAX', 'INTELGPUX',
            # Intel GPUs
-           'PVC']
+           'PVC', 'INTELGPUMAX', 'MAX1100', 'MAX1550']
 
 
 @memoized_func
@@ -848,10 +848,14 @@ def march(cls):
 
 # Devices
 NVIDIAX = NvidiaDevice('nvidiaX')
+
 AMDGPUX = AmdDevice('amdgpuX')
-INTELGPUX = IntelDevice('intelgpuX')
 
-PVC = IntelDevice('pvc', max_threads_per_block=4096)  # Intel Ponte Vecchio GPU
+INTELGPUX = IntelDevice('intelgpuX')
+PVC = IntelDevice('pvc', max_threads_per_block=4096)  # Legacy codename for MAX GPUs
+INTELGPUMAX = IntelDevice('intelgpuMAX', max_threads_per_block=4096)
+MAX1100 = IntelDevice('max1100', max_threads_per_block=4096)
+MAX1550 = IntelDevice('max1550', max_threads_per_block=4096)
 
 platform_registry = Platform.registry
 platform_registry['cpu64'] = get_platform  # Autodetection
diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py
index 08f509c5fe..3b102f1822 100644
--- a/devito/arch/compiler.py
+++ b/devito/arch/compiler.py
@@ -2,7 +2,8 @@
 from hashlib import sha1
 from os import environ, path, makedirs
 from packaging.version import Version
-from subprocess import DEVNULL, PIPE, CalledProcessError, check_output, check_call, run
+from subprocess import (DEVNULL, PIPE, CalledProcessError, check_output,
+                        check_call, run)
 import platform
 import warnings
 import sys
@@ -10,10 +11,11 @@
 
 import numpy.ctypeslib as npct
 from codepy.jit import compile_from_string
-from codepy.toolchain import GCCToolchain, call_capture_output as _call_capture_output
+from codepy.toolchain import (GCCToolchain,
+                              call_capture_output as _call_capture_output)
 
 from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, POWER8, POWER9, GRAVITON,
-                         INTELGPUX, PVC, get_nvidia_cc, check_cuda_runtime,
+                         IntelDevice, get_nvidia_cc, check_cuda_runtime,
                          get_m1_llvm_path)
 from devito.exceptions import CompilationError
 from devito.logger import debug, warning, error
@@ -716,7 +718,6 @@ def __lookup_cmds__(self):
 class IntelCompiler(Compiler):
 
     def __init_finalize__(self, **kwargs):
-
         platform = kwargs.pop('platform', configuration['platform'])
         language = kwargs.pop('language', configuration['language'])
 
@@ -734,13 +735,20 @@ def __init_finalize__(self, **kwargs):
         if language == 'openmp':
             self.ldflags.append('-qopenmp')
 
-        # Make sure the MPI compiler uses `icc` underneath -- whatever the MPI distro is
         if kwargs.get('mpi'):
-            mpi_distro = sniff_mpi_distro('mpiexec')
-            if mpi_distro != 'IntelMPI':
-                warning("Expected Intel MPI distribution with `%s`, but found `%s`"
-                        % (self.__class__.__name__, mpi_distro))
-            self.cflags.insert(0, '-cc=%s' % self.CC)
+            self.__init_intel_mpi__()
+            self.__init_intel_mpi_flags__()
+
+    def __init_intel_mpi__(self, **kwargs):
+        # Make sure the MPI compiler uses an Intel compiler underneath,
+        # whatever the MPI distro is
+        mpi_distro = sniff_mpi_distro('mpiexec')
+        if mpi_distro != 'IntelMPI':
+            warning("Expected Intel MPI distribution with `%s`, but found `%s`"
+                    % (self.__class__.__name__, mpi_distro))
+
+    def __init_intel_mpi_flags__(self, **kwargs):
+        self.cflags.insert(0, '-cc=%s' % self.CC)
 
     def get_version(self):
         if configuration['mpi']:
@@ -792,36 +800,80 @@ def __init_finalize__(self, **kwargs):
         platform = kwargs.pop('platform', configuration['platform'])
         language = kwargs.pop('language', configuration['language'])
 
-        # Earlier versions to OneAPI 2023.2.0 (clang17 underneath), have an OpenMP bug
-        if self.version < Version('17.0.0') and language == 'openmp':
-            self.ldflags.remove('-qopenmp')
-            self.ldflags.append('-fopenmp')
-
         if language == 'sycl':
-            self.cflags.append('-fsycl')
-            if platform is NVIDIAX:
-                self.cflags.append('-fsycl-targets=nvptx64-cuda')
-            else:
-                self.cflags.append('-fsycl-targets=spir64')
+            raise ValueError("Use SyclCompiler to jit-compile sycl")
+
+        elif language == 'openmp':
+            # Earlier versions to OneAPI 2023.2.0 (clang17 underneath), have an
+            # OpenMP bug concerning reductions, hence with them we're forced to
+            # use the obsolete -fopenmp
+            if self.version < Version('17.0.0'):
+                self.ldflags.remove('-qopenmp')
+                self.ldflags.append('-fopenmp')
 
             if platform is NVIDIAX:
                 self.cflags.append('-fopenmp-targets=nvptx64-cuda')
-        if platform in [INTELGPUX, PVC]:
-            self.ldflags.append('-fiopenmp')
-            self.ldflags.append('-fopenmp-targets=spir64')
-            self.ldflags.append('-fopenmp-target-simd')
+            elif isinstance(platform, IntelDevice):
+                self.cflags.append('-fiopenmp')
+                self.cflags.append('-fopenmp-targets=spir64')
+                self.cflags.append('-fopenmp-target-simd')
+
+                self.cflags.remove('-g')  # -g disables some optimizations in IGC
+                self.cflags.append('-gline-tables-only')
+                self.cflags.append('-fdebug-info-for-profiling')
+
+    def __init_intel_mpi__(self, **kwargs):
+        IntelCompiler.__init_intel_mpi__(self, **kwargs)
 
-            self.cflags.remove('-g')  # -g disables some optimizations in IGC
-            self.cflags.append('-gline-tables-only')
-            self.cflags.append('-fdebug-info-for-profiling')
+        platform = kwargs.pop('platform', configuration['platform'])
+
+        # The Intel toolchain requires the I_MPI_OFFLOAD env var to be set
+        # to enable GPU-aware MPI (that is, passing device pointers to MPI calls)
+        if isinstance(platform, IntelDevice):
+            environ['I_MPI_OFFLOAD'] = '1'
+
+    def __init_intel_mpi_flags__(self, **kwargs):
+        pass
+
+    get_version = Compiler.get_version
 
     def __lookup_cmds__(self):
         # OneAPI HPC ToolKit comes with icpx, which is clang++,
         # and icx, which is clang
         self.CC = 'icx'
         self.CXX = 'icpx'
-        self.MPICC = 'mpicc'
-        self.MPICXX = 'mpicxx'
+        self.MPICC = 'mpiicx'
+        self.MPICXX = 'mpiicpx'
+
+
+class SyclCompiler(OneapiCompiler):
+
+    _cpp = True
+
+    def __init_finalize__(self, **kwargs):
+        IntelCompiler.__init_finalize__(self, **kwargs)
+
+        platform = kwargs.pop('platform', configuration['platform'])
+        language = kwargs.pop('language', configuration['language'])
+
+        if language != 'sycl':
+            raise ValueError("Expected language sycl with SyclCompiler")
+
+        self.cflags.remove('-std=c99')
+        self.cflags.append('-fsycl')
+
+        self.cflags.remove('-g')  # -g disables some optimizations in IGC
+        self.cflags.append('-gline-tables-only')
+        self.cflags.append('-fdebug-info-for-profiling')
+
+        if isinstance(platform, Cpu64):
+            pass
+        elif platform is NVIDIAX:
+            self.cflags.append('-fsycl-targets=nvptx64-cuda')
+        elif isinstance(platform, IntelDevice):
+            self.cflags.append('-fsycl-targets=spir64')
+        else:
+            raise NotImplementedError("Unsupported platform %s" % platform)
 
 
 class CustomCompiler(Compiler):
@@ -845,7 +897,7 @@ def __new__(cls, *args, **kwargs):
 
         if platform is M1:
             _base = ClangCompiler
-        elif platform is INTELGPUX:
+        elif isinstance(platform, IntelDevice):
             _base = OneapiCompiler
         elif platform is NVIDIAX:
             if language == 'cuda':
@@ -915,6 +967,7 @@ def __new_with__(self, **kwargs):
     'intel': OneapiCompiler,
     'icx': OneapiCompiler,
     'icpx': OneapiCompiler,
+    'sycl': SyclCompiler,
     'icc': IntelCompiler,
     'icpc': IntelCompiler,
     'intel-knl': IntelKNLCompiler,
diff --git a/devito/finite_differences/differentiable.py b/devito/finite_differences/differentiable.py
index 607ab7693b..6cb4d4b45d 100644
--- a/devito/finite_differences/differentiable.py
+++ b/devito/finite_differences/differentiable.py
@@ -682,11 +682,12 @@ def __init_finalize__(self, *args, **kwargs):
         assert isinstance(weights, (list, tuple, np.ndarray))
 
         # Normalize `weights`
-        weights = tuple(sympy.sympify(i) for i in weights)
+        from devito.symbolics import pow_to_mul  # noqa, sigh
+        weights = tuple(pow_to_mul(sympy.sympify(i)) for i in weights)
 
         self._spacings = set().union(*[i.find(Spacing) for i in weights])
 
-        kwargs['scope'] = 'constant'
+        kwargs['scope'] = kwargs.get('scope', 'stack')
         kwargs['initvalue'] = weights
 
         super().__init_finalize__(*args, **kwargs)
@@ -701,7 +702,7 @@ def __eq__(self, other):
     __hash__ = sympy.Basic.__hash__
 
     def _hashable_content(self):
-        return (self.name, self.dimension, str(self.weights))
+        return (self.name, self.dimension, str(self.weights), self.scope)
 
     @property
     def dimension(self):
diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py
index 486e7e4b80..136e7bbfee 100644
--- a/devito/ir/iet/nodes.py
+++ b/devito/ir/iet/nodes.py
@@ -24,7 +24,7 @@
 __all__ = ['Node', 'Block', 'Expression', 'Callable', 'Call', 'ExprStmt',
            'Conditional', 'Iteration', 'List', 'Section', 'TimedList', 'Prodder',
            'MetaCall', 'PointerCast', 'HaloSpot', 'Definition', 'ExpressionBundle',
-           'AugmentedExpression', 'Increment', 'Return', 'While',
+           'AugmentedExpression', 'Increment', 'Return', 'While', 'ListMajor',
            'ParallelIteration', 'ParallelBlock', 'Dereference', 'Lambda',
            'SyncSpot', 'Pragma', 'DummyExpr', 'BlankLine', 'ParallelTree',
            'BusyWait', 'CallableBody', 'Transfer']
@@ -132,12 +132,12 @@ def __repr__(self):
 
     @property
     def functions(self):
-        """All AbstractFunction objects used by this node."""
+        """All AbstractFunctions and AbstractObjects used by this node."""
         return ()
 
     @property
     def expr_symbols(self):
-        """All symbols appearing in an expression within this node."""
+        """All symbols appearing within this node."""
         return ()
 
     @property
@@ -243,12 +243,14 @@ class Call(ExprStmt, Node):
         Explicitly tagging these AbstractFunctions is useful in the case of external
         calls, that is whenever the compiler would be unable to retrieve that
         information by analysis of the IET graph.
+    templates : list of Basic, optional
+        The template arguments of the Call.
     """
 
     is_Call = True
 
     def __init__(self, name, arguments=None, retobj=None, is_indirect=False,
-                 cast=False, writes=None):
+                 cast=False, writes=None, templates=None):
         if isinstance(name, CallFromPointer):
             self.base = name.base
         else:
@@ -259,11 +261,22 @@ def __init__(self, name, arguments=None, retobj=None, is_indirect=False,
         self.is_indirect = is_indirect
         self.cast = cast
         self._writes = as_tuple(writes)
+        self.templates = as_tuple(templates)
 
     def __repr__(self):
         ret = "" if self.retobj is None else "%s = " % self.retobj
         return "%sCall::\n\t%s(...)" % (ret, self.name)
 
+    def _rebuild(self, *args, **kwargs):
+        if args:
+            # Not elegant, but basically it handles the fact that a Call might
+            # have nested Calls/Lambdas among its `arguments`, and these might
+            # change, and we are in such a case *if and only if* we have `args`
+            assert len(args) == len(self.children)
+            mapper = dict(zip(self.children, args))
+            kwargs['arguments'] = [mapper.get(i, i) for i in self.arguments]
+        return super()._rebuild(**kwargs)
+
     @property
     def children(self):
         return tuple(i for i in self.arguments if isinstance(i, (Call, Lambda)))
@@ -326,8 +339,6 @@ def expr_symbols(self):
     @property
     def defines(self):
         ret = ()
-        if self.base is not None:
-            ret += (self.base,)
         if isinstance(self.retobj, Basic):
             ret += (self.retobj,)
         return ret
@@ -743,8 +754,16 @@ class CallableBody(Node):
     init : Node, optional
         A piece of IET to perform some initialization relevant for `body`
         (e.g., to initialize the target language runtime).
+    standalones : list of Definitions, optional
+        Object definitions for `body`. Instantiating these objects does not
+        require passing any arguments to their constructors, so these
+        Definitions can be scheduled safely right after `init`. They may or may
+        not be required by some of the subsequent nodes (e.g., `allocs`,
+        `maps`).
     allocs : list of Nodes, optional
         Data definitions and allocations for `body`.
+    stacks : list of Definitions, optional
+        Definitions for the stack-scoped objects appearing in `body`.
     casts : list of PointerCasts, optional
         Sequence of PointerCasts required by the `body`.
     bundles : list of Nodes, optional
@@ -756,7 +775,9 @@ class CallableBody(Node):
     strides : list of Nodes, optional
         Statements defining symbols used to access linearized arrays.
     objs : list of Definitions, optional
-        Object definitions for `body`.
+        Object definitions for `body`. Instantiating these objects may or may
+        not require some of the symbols defined in the previous nodes (e.g.,
+        `allocs`, `maps`).
     unmaps : Transfer or list of Transfer, optional
         Data unmaps for `body`.
     unbundles : list of Nodes, optional
@@ -767,11 +788,13 @@ class CallableBody(Node):
 
     is_CallableBody = True
 
-    _traversable = ['unpacks', 'init', 'allocs', 'casts', 'bundles', 'maps',
-                    'strides', 'objs', 'body', 'unmaps', 'unbundles', 'frees']
+    _traversable = ['unpacks', 'init', 'standalones', 'allocs', 'stacks',
+                    'casts', 'bundles', 'maps', 'strides', 'objs', 'body',
+                    'unmaps', 'unbundles', 'frees']
 
-    def __init__(self, body, init=(), unpacks=(), strides=(), allocs=(), casts=(),
-                 bundles=(), objs=(), maps=(), unmaps=(), unbundles=(), frees=()):
+    def __init__(self, body, init=(), standalones=(), unpacks=(), strides=(),
+                 allocs=(), stacks=(), casts=(), bundles=(), objs=(), maps=(),
+                 unmaps=(), unbundles=(), frees=()):
         # Sanity check
         assert not isinstance(body, CallableBody), "CallableBody's cannot be nested"
 
@@ -779,7 +802,9 @@ def __init__(self, body, init=(), unpacks=(), strides=(), allocs=(), casts=(),
 
         self.unpacks = as_tuple(unpacks)
         self.init = as_tuple(init)
+        self.standalones = as_tuple(standalones)
         self.allocs = as_tuple(allocs)
+        self.stacks = as_tuple(stacks)
         self.casts = as_tuple(casts)
         self.strides = as_tuple(strides)
         self.bundles = as_tuple(bundles)
@@ -894,7 +919,12 @@ def __repr__(self):
 
     @property
     def functions(self):
-        return (self.function,)
+        ret = [self.function]
+        for i in self.expr_symbols:
+            f = i.function
+            if f.is_AbstractFunction or f.is_AbstractObject:
+                ret.append(i.function)
+        return tuple(ret)
 
     @property
     def defines(self):
@@ -905,16 +935,25 @@ def defines(self):
 
     @property
     def expr_symbols(self):
-        if not self.function.is_Array or self.function.initvalue is None:
-            return ()
-        # These are just a handful of values so it's OK to iterate them over
-        ret = set()
-        for i in self.function.initvalue:
+        f = self.function
+        if f.is_LocalObject:
+            ret = set(flatten(i.free_symbols for i in f.cargs))
             try:
-                ret.update(i.free_symbols)
+                ret.update(f.initvalue.free_symbols)
             except AttributeError:
                 pass
-        return tuple(ret)
+            return tuple(ret)
+        elif f.is_Array and f.initvalue is not None:
+            # These are just a handful of values so it's OK to iterate them over
+            ret = set()
+            for i in f.initvalue:
+                try:
+                    ret.update(i.free_symbols)
+                except AttributeError:
+                    pass
+            return tuple(ret)
+        else:
+            return ()
 
 
 class PointerCast(ExprStmt, Node):
@@ -1044,10 +1083,19 @@ def __init__(self, body, captures=None, parameters=None):
     def __repr__(self):
         return "Lambda[%s](%s)" % (self.captures, self.parameters)
 
+    @property
+    def functions(self):
+        return tuple(i.function for i in self.parameters
+                     if isinstance(i.function, AbstractFunction))
+
     @cached_property
     def expr_symbols(self):
         return tuple(self.parameters)
 
+    @property
+    def defines(self):
+        return tuple(self.parameters)
+
 
 class Section(List):
 
@@ -1310,6 +1358,10 @@ def __init__(self, value=None):
         self.value = value
 
 
+class ListMajor(List):
+    pass
+
+
 def DummyExpr(*args, init=False):
     return Expression(DummyEq(*args), init=init)
 
diff --git a/devito/ir/iet/visitors.py b/devito/ir/iet/visitors.py
index 38145cac3a..f6e266ed9d 100644
--- a/devito/ir/iet/visitors.py
+++ b/devito/ir/iet/visitors.py
@@ -15,9 +15,10 @@
 
 from devito.exceptions import VisitorException
 from devito.ir.iet.nodes import (Node, Iteration, Expression, ExpressionBundle,
-                                 Call, Lambda, BlankLine, Section)
+                                 Call, Lambda, BlankLine, Section, ListMajor)
 from devito.ir.support.space import Backward
-from devito.symbolics import ListInitializer, ccode, uxreplace
+from devito.symbolics import (FieldFromComposite, FieldFromPointer,
+                              ListInitializer, ccode, uxreplace)
 from devito.tools import (GenericVisitor, as_tuple, ctypes_to_cstr, filter_ordered,
                           filter_sorted, flatten, is_external_ctype,
                           c_restrict_void_p, sorted_priority)
@@ -192,10 +193,14 @@ def _gen_struct_decl(self, obj, masked=()):
         Convert ctypes.Struct -> cgen.Structure.
         """
         ctype = obj._C_ctype
-        while issubclass(ctype, ctypes._Pointer):
-            ctype = ctype._type_
+        try:
+            while issubclass(ctype, ctypes._Pointer):
+                ctype = ctype._type_
 
-        if not issubclass(ctype, ctypes.Structure):
+            if not issubclass(ctype, ctypes.Structure):
+                return None
+        except TypeError:
+            # E.g., `ctype` is of type `dtypes_lowering.CustomDtype`
             return None
 
         try:
@@ -223,45 +228,55 @@ def _gen_struct_decl(self, obj, masked=()):
 
         return c.Struct(ctype.__name__, entries)
 
-    def _gen_value(self, obj, level=2, masked=()):
+    def _gen_value(self, obj, mode=1, masked=()):
+        """
+        Convert a devito.types.Basic object into a cgen declaration/definition.
+
+        A Basic object may need to be declared and optionally defined in three
+        different ways, which correspond to the three possible values of `mode`:
+
+            * 0: Simple. E.g., `int a = 1`;
+            * 1: Comprehensive. E.g., `const int *restrict a`, `int a[10]`;
+            * 2: Declaration suitable for a function parameter list.
+        """
         qualifiers = [v for k, v in self._qualifiers_mapper.items()
                       if getattr(obj.function, k, False) and v not in masked]
 
-        if (obj._mem_stack or obj._mem_constant) and level == 2:
+        if (obj._mem_stack or obj._mem_constant) and mode == 1:
             strtype = obj._C_typedata
             strshape = ''.join('[%s]' % ccode(i) for i in obj.symbolic_shape)
         else:
             strtype = ctypes_to_cstr(obj._C_ctype)
             strshape = ''
-            if isinstance(obj, (AbstractFunction, IndexedData)) and level >= 1:
+            if isinstance(obj, (AbstractFunction, IndexedData)) and mode >= 1:
                 if not obj._mem_stack:
                     strtype = '%s%s' % (strtype, self._restrict_keyword)
         strtype = ' '.join(qualifiers + [strtype])
 
+        if obj.is_LocalObject and obj._C_modifier is not None and mode == 2:
+            strtype += obj._C_modifier
+
         strname = obj._C_name
         strobj = '%s%s' % (strname, strshape)
 
-        try:
-            if obj.cargs:
-                strobj = MultilineCall(strobj, obj.cargs, True)
-        except AttributeError:
-            pass
+        if obj.is_LocalObject and obj.cargs and mode == 1:
+            arguments = [ccode(i) for i in obj.cargs]
+            strobj = MultilineCall(strobj, arguments, True)
 
         value = c.Value(strtype, strobj)
 
         try:
-            if obj.is_AbstractFunction and obj._data_alignment and level == 2:
+            if obj.is_AbstractFunction and obj._data_alignment and mode == 1:
                 value = c.AlignedAttribute(obj._data_alignment, value)
         except AttributeError:
             pass
 
-        try:
-            if obj.initvalue is not None and level == 2:
-                init = ListInitializer(obj.initvalue)
-                if not obj._mem_constant or init.is_numeric:
-                    value = c.Initializer(value, ccode(init))
-        except AttributeError:
-            pass
+        if obj.is_Array and obj.initvalue is not None and mode == 1:
+            init = ListInitializer(obj.initvalue)
+            if not obj._mem_constant or init.is_numeric:
+                value = c.Initializer(value, ccode(init))
+        elif obj.is_LocalObject and obj.initvalue is not None and mode == 1:
+            value = c.Initializer(value, ccode(obj.initvalue))
 
         return value
 
@@ -269,12 +284,17 @@ def _gen_rettype(self, obj):
         try:
             return self._gen_value(obj, 0).typename
         except AttributeError:
-            assert isinstance(obj, str)
+            pass
+        if isinstance(obj, str):
             return obj
+        elif isinstance(obj, (FieldFromComposite, FieldFromPointer)):
+            return self._gen_value(obj.function.base, 0).typename
+        else:
+            return None
 
     def _args_decl(self, args):
         """Generate cgen declarations from an iterable of symbols and expressions."""
-        return [self._gen_value(i, 1) for i in args]
+        return [self._gen_value(i, 2) for i in args]
 
     def _args_call(self, args):
         """
@@ -306,7 +326,8 @@ def _blankline_logic(self, children):
         """
         Generate cgen blank lines in between logical units.
         """
-        candidates = (Expression, ExpressionBundle, Iteration, Section)
+        candidates = (Expression, ExpressionBundle, Iteration, Section,
+                      ListMajor)
 
         processed = []
         for child in children:
@@ -327,8 +348,8 @@ def _blankline_logic(self, children):
                       all(i.dim.is_Stencil for i in g)):
                     rebuilt.extend(g)
                 elif (prev in candidates and k in candidates) or \
-                     (prev is not None and k is Section) or \
-                     (prev is Section):
+                     (prev is not None and k in (ListMajor, Section)) or \
+                     (prev in (ListMajor, Section)):
                     rebuilt.append(BlankLine)
                     rebuilt.extend(g)
                 else:
@@ -467,16 +488,19 @@ def visit_AugmentedExpression(self, o):
 
     def visit_Call(self, o, nested_call=False):
         retobj = o.retobj
-        cast = o.cast and self._gen_rettype(retobj)
+        rettype = self._gen_rettype(retobj)
+        cast = o.cast and rettype
         arguments = self._args_call(o.arguments)
         if retobj is None:
-            return MultilineCall(o.name, arguments, nested_call, o.is_indirect, cast)
+            return MultilineCall(o.name, arguments, nested_call, o.is_indirect,
+                                 cast, o.templates)
         else:
-            call = MultilineCall(o.name, arguments, True, o.is_indirect, cast)
-            if retobj.is_Indexed:
+            call = MultilineCall(o.name, arguments, True, o.is_indirect, cast,
+                                 o.templates)
+            if retobj.is_Indexed or \
+               isinstance(retobj, (FieldFromComposite, FieldFromPointer)):
                 return c.Assign(ccode(retobj), call)
             else:
-                rettype = self._gen_rettype(retobj)
                 return c.Initializer(c.Value(rettype, retobj._C_name), call)
 
     def visit_Conditional(self, o):
@@ -562,7 +586,13 @@ def visit_CallableBody(self, o):
         return c.Collection(body)
 
     def visit_Lambda(self, o):
-        body = flatten(self._visit(i) for i in o.children)
+        body = []
+        for i in o.children:
+            v = self._visit(i)
+            if v:
+                if body:
+                    body.append(c.Line())
+                body.extend(as_tuple(v))
         captures = [str(i) for i in o.captures]
         decls = [i.inline() for i in self._args_decl(o.parameters)]
         top = c.Line('[%s](%s)' % (', '.join(captures), ', '.join(decls)))
@@ -605,7 +635,8 @@ def _operator_typedecls(self, o, mode='all'):
 
         # This is essentially to rule out vector types which are declared already
         # in some external headers
-        xfilter = lambda i: xfilter1(i) and not is_external_ctype(i._C_ctype, o._includes)
+        xfilter = lambda i: (xfilter1(i) and
+                             not is_external_ctype(i._C_ctype, o._includes))
 
         candidates = o.parameters + tuple(o._dspace.parts)
         typedecls = [self._gen_struct_decl(i) for i in candidates if xfilter(i)]
@@ -645,6 +676,11 @@ def visit_Operator(self, o, mode='all'):
         # Header files
         includes = self._operator_includes(o) + [blankline]
 
+        # Namespaces
+        namespaces = [c.Statement('using namespace %s' % i) for i in o._namespaces]
+        if namespaces:
+            namespaces.append(blankline)
+
         # Type declarations
         typedecls = self._operator_typedecls(o, mode)
         if mode in ('all', 'public') and o._compiler.src_ext in ('cpp', 'cu'):
@@ -656,7 +692,7 @@ def visit_Operator(self, o, mode='all'):
         if globs:
             globs.append(blankline)
 
-        return c.Module(headers + includes + typedecls + globs +
+        return c.Module(headers + includes + namespaces + typedecls + globs +
                         esigns + [blankline, kernel] + efuncs)
 
 
@@ -745,6 +781,8 @@ def visit_Conditional(self, o, ret=None, queue=None):
             ret = self._visit(i, ret=ret, queue=queue)
         return ret
 
+    visit_Call = visit_Conditional
+
 
 class MapKind(FindSections):
 
@@ -764,11 +802,21 @@ def visit_dummy(self, o, ret=None, queue=None):
 
     visit_Conditional = FindSections.visit_Iteration
     visit_Block = FindSections.visit_Iteration
+    visit_Lambda = FindSections.visit_Iteration
 
 
 class MapExprStmts(MapKind):
+
     visit_ExprStmt = MapKind.visit_dummy
 
+    def visit_Call(self, o, ret=None, queue=None):
+        if ret is None:
+            ret = self.default_retval()
+        ret[o] = as_tuple(queue)
+        for i in o.children:
+            ret = self._visit(i, ret=ret, queue=queue)
+        return ret
+
 
 class MapHaloSpots(MapKind):
     visit_HaloSpot = MapKind.visit_dummy
@@ -896,7 +944,7 @@ def _defines_aliases(n):
                                    if isinstance(i, IndexedBase)],
         'writes': lambda n: as_tuple(n.writes),
         'defines': lambda n: as_tuple(n.defines),
-        'globals': lambda n: [f.indexed for f in n.functions if f._mem_constant],
+        'globals': lambda n: [f.base for f in n.functions if f._mem_global],
         'defines-aliases': _defines_aliases
     }
 
@@ -1175,13 +1223,23 @@ def visit_Callable(self, o):
         return o._rebuild(body=body, parameters=parameters)
 
     def visit_Call(self, o):
-        arguments = [uxreplace(i, self.mapper) for i in o.arguments]
+        arguments = []
+        for i in o.arguments:
+            if i in o.children:
+                arguments.append(self._visit(i))
+            else:
+                arguments.append(uxreplace(i, self.mapper))
         if o.retobj is not None:
             retobj = uxreplace(o.retobj, self.mapper)
             return o._rebuild(arguments=arguments, retobj=retobj)
         else:
             return o._rebuild(arguments=arguments)
 
+    def visit_Lambda(self, o):
+        body = self._visit(o.body)
+        parameters = [self.mapper.get(i, i) for i in o.parameters]
+        return o._rebuild(body=body, parameters=parameters)
+
     def visit_Conditional(self, o):
         condition = uxreplace(o.condition, self.mapper)
         then_body = self._visit(o.then_body)
@@ -1246,18 +1304,24 @@ class LambdaCollection(c.Collection):
 
 class MultilineCall(c.Generable):
 
-    def __init__(self, name, arguments, is_expr=False, is_indirect=False, cast=None):
+    def __init__(self, name, arguments, is_expr=False, is_indirect=False,
+                 cast=None, templates=None):
         self.name = name
         self.arguments = as_tuple(arguments)
         self.is_expr = is_expr
         self.is_indirect = is_indirect
         self.cast = cast
+        self.templates = templates
 
     def generate(self):
+        if self.templates:
+            tip = "%s<%s>" % (self.name, ", ".join(str(i) for i in self.templates))
+        else:
+            tip = self.name
         if not self.is_indirect:
-            tip = "%s(" % self.name
+            tip = "%s(" % tip
         else:
-            tip = "%s%s" % (self.name, ',' if self.arguments else '')
+            tip = "%s%s" % (tip, ',' if self.arguments else '')
         processed = []
         for i in self.arguments:
             if isinstance(i, (MultilineCall, LambdaCollection)):
diff --git a/devito/operator/operator.py b/devito/operator/operator.py
index 7aff552a47..4d5208235a 100644
--- a/devito/operator/operator.py
+++ b/devito/operator/operator.py
@@ -136,6 +136,7 @@ class Operator(Callable):
     _default_headers = [('_POSIX_C_SOURCE', '200809L')]
     _default_includes = ['stdlib.h', 'math.h', 'sys/time.h']
     _default_globals = []
+    _default_namespaces = []
 
     def __new__(cls, expressions, **kwargs):
         if expressions is None:
@@ -191,6 +192,8 @@ def _build(cls, expressions, **kwargs):
         op._includes = OrderedSet(*cls._default_includes)
         op._includes.update(profiler._default_includes)
         op._includes.update(byproduct.includes)
+        op._namespaces = OrderedSet(*cls._default_namespaces)
+        op._namespaces.update(byproduct.namespaces)
 
         # Required for the jit-compilation
         op._compiler = kwargs['compiler']
diff --git a/devito/operator/registry.py b/devito/operator/registry.py
index 722b451a3a..04c1000866 100644
--- a/devito/operator/registry.py
+++ b/devito/operator/registry.py
@@ -26,7 +26,7 @@ class OperatorRegistry(OrderedDict, metaclass=Singleton):
     """
 
     _modes = ('noop', 'advanced', 'advanced-fsg')
-    _languages = ('C', 'openmp', 'openacc', 'cuda', 'hip')
+    _languages = ('C', 'openmp', 'openacc', 'cuda', 'hip', 'sycl')
     _accepted = _modes + tuple(product(_modes, _languages))
 
     def add(self, operator, platform, mode, language='C'):
diff --git a/devito/passes/clusters/factorization.py b/devito/passes/clusters/factorization.py
index 41c1ed2ec0..997b6d81fc 100644
--- a/devito/passes/clusters/factorization.py
+++ b/devito/passes/clusters/factorization.py
@@ -5,6 +5,7 @@
 from devito.ir import cluster_pass
 from devito.symbolics import BasicWrapperMixin, estimate_cost, retrieve_symbols
 from devito.tools import ReducerMap
+from devito.types.object import AbstractObject
 
 __all__ = ['factorize']
 
@@ -179,7 +180,9 @@ def _collect_nested(expr):
         return expr, {'funcs': expr}
     elif expr.is_Pow:
         return expr, {'pows': expr}
-    elif expr.is_Symbol or expr.is_Indexed or isinstance(expr, BasicWrapperMixin):
+    elif (expr.is_Symbol or
+          expr.is_Indexed or
+          isinstance(expr, (BasicWrapperMixin, AbstractObject))):
         return expr, {}
     elif expr.is_Add:
         return strategies['default'](expr), {}
diff --git a/devito/passes/iet/definitions.py b/devito/passes/iet/definitions.py
index 903cb5eb35..913432da8e 100644
--- a/devito/passes/iet/definitions.py
+++ b/devito/passes/iet/definitions.py
@@ -24,8 +24,8 @@
 
 class MetaSite(object):
 
-    _items = ('allocs', 'objs', 'frees', 'pallocs', 'pfrees',
-              'maps', 'unmaps', 'efuncs')
+    _items = ('standalones', 'allocs', 'stacks', 'objs', 'frees', 'pallocs',
+              'pfrees', 'maps', 'unmaps', 'efuncs')
 
     def __init__(self):
         for i in self._items:
@@ -90,7 +90,10 @@ def _alloc_object_on_low_lat_mem(self, site, obj, storage):
 
         frees = obj._C_free
 
-        storage.update(obj, site, objs=definition, frees=frees)
+        if obj.free_symbols - {obj}:
+            storage.update(obj, site, objs=definition, frees=frees)
+        else:
+            storage.update(obj, site, standalones=definition, frees=frees)
 
     def _alloc_array_on_low_lat_mem(self, site, obj, storage):
         """
@@ -98,7 +101,7 @@ def _alloc_array_on_low_lat_mem(self, site, obj, storage):
         """
         alloc = Definition(obj)
 
-        storage.update(obj, site, allocs=alloc)
+        storage.update(obj, site, stacks=alloc)
 
     def _alloc_array_on_global_mem(self, site, obj, storage):
         """
@@ -198,13 +201,13 @@ def _alloc_mapped_array_on_high_bw_mem(self, site, obj, storage, *args):
         name = self.sregistry.make_name(prefix='alloc')
         body = (decl, *allocs, init, ret)
         efunc0 = make_callable(name, body, retval=obj)
-        assert len(efunc0.parameters) == 1  # `nbytes_param`
-        alloc = Call(name, nbytes_arg, retobj=obj)
+        args = list(efunc0.parameters)
+        args[args.index(nbytes_param)] = nbytes_arg
+        alloc = Call(name, args, retobj=obj)
 
         name = self.sregistry.make_name(prefix='free')
         efunc1 = make_callable(name, frees)
-        assert len(efunc1.parameters) == 1  # `obj`
-        free = Call(name, obj)
+        free = Call(name, efunc1.parameters)
 
         storage.update(obj, site, allocs=alloc, frees=free, efuncs=(efunc0, efunc1))
 
@@ -271,10 +274,12 @@ def _inject_definitions(self, iet, storage):
             cbody = k.body
 
             # objects
+            standalones = as_list(cbody.standalones) + flatten(v.standalones)
             objs = as_list(cbody.objs) + flatten(v.objs)
 
             # allocs/pallocs
             allocs = as_list(cbody.allocs) + flatten(v.allocs)
+            stacks = as_list(cbody.stacks) + flatten(v.stacks)
             for tid, body in as_mapper(v.pallocs, itemgetter(0), itemgetter(1)).items():
                 header = self.lang.Region._make_header(tid.symbolic_size)
                 init = self.lang['thread-num'](retobj=tid)
@@ -295,8 +300,10 @@ def _inject_definitions(self, iet, storage):
             # efuncs
             efuncs.extend(v.efuncs)
 
-            mapper[cbody] = cbody._rebuild(allocs=allocs, maps=maps, objs=objs,
-                                           unmaps=unmaps, frees=frees)
+            mapper[cbody] = cbody._rebuild(
+                standalones=standalones, allocs=allocs, stacks=stacks,
+                maps=maps, objs=objs, unmaps=unmaps, frees=frees
+            )
 
         processed = Transformer(mapper, nested=True).visit(iet)
 
diff --git a/devito/passes/iet/engine.py b/devito/passes/iet/engine.py
index f74dfec110..c4e98e715c 100644
--- a/devito/passes/iet/engine.py
+++ b/devito/passes/iet/engine.py
@@ -46,6 +46,7 @@ def __init__(self, iet, options=None, sregistry=None, **kwargs):
 
         self.includes = []
         self.headers = []
+        self.namespaces = []
         self.globals = []
 
         # Stash immutable information useful for one or more compiler passes
@@ -86,6 +87,7 @@ def apply(self, func, **kwargs):
 
             self.includes.extend(as_tuple(metadata.get('includes')))
             self.headers.extend(as_tuple(metadata.get('headers')))
+            self.namespaces.extend(as_tuple(metadata.get('namespaces')))
             self.globals.extend(as_tuple(metadata.get('globals')))
 
             # Update jit-compiler if necessary
@@ -119,6 +121,7 @@ def apply(self, func, **kwargs):
         # Uniqueness
         self.includes = filter_ordered(self.includes)
         self.headers = filter_ordered(self.headers, key=str)
+        self.namespaces = filter_ordered(self.namespaces, key=str)
         self.globals = filter_ordered(self.globals)
 
     def visit(self, func, **kwargs):
diff --git a/devito/passes/iet/langbase.py b/devito/passes/iet/langbase.py
index 91e68fc02b..a8df344dbc 100644
--- a/devito/passes/iet/langbase.py
+++ b/devito/passes/iet/langbase.py
@@ -1,4 +1,5 @@
 from functools import singledispatch
+from itertools import takewhile
 from abc import ABC
 
 import cgen as c
@@ -6,12 +7,12 @@
 from devito.data import FULL
 from devito.ir import (DummyExpr, Call, Conditional, Expression, List, Prodder,
                        ParallelIteration, ParallelBlock, PointerCast, EntryFunction,
-                       AsyncCallable, FindNodes, FindSymbols)
+                       AsyncCallable, FindNodes, FindSymbols, IsPerfectIteration)
 from devito.mpi.distributed import MPICommObject
 from devito.passes import is_on_device
 from devito.passes.iet.engine import iet_pass
 from devito.symbolics import Byref, CondNe, SizeOf
-from devito.tools import as_list, prod
+from devito.tools import as_list, is_integer, prod
 from devito.types import Symbol, QueueID, Wildcard
 
 __all__ = ['LangBB', 'LangTransformer']
@@ -160,7 +161,8 @@ def __init__(self, key, sregistry, platform, compiler):
         Parameters
         ----------
         key : callable, optional
-            Return True if an Iteration can and should be parallelized, False otherwise.
+            Return True if an Iteration can and should be parallelized,
+            False otherwise.
         sregistry : SymbolRegistry
             The symbol registry, to access the symbols appearing in an IET.
         platform : Platform
@@ -215,6 +217,169 @@ def Prodder(self):
         return self.lang.Prodder
 
 
+class ShmTransformer(LangTransformer):
+
+    """
+    Abstract base class for LangTransformers that want to emit
+    shared-memory-parallel IETs for CPUs.
+    """
+
+    def __init__(self, key, sregistry, options, platform, compiler):
+        """
+        Parameters
+        ----------
+        key : callable, optional
+            Return True if an Iteration can and should be parallelized,
+            False otherwise.
+        sregistry : SymbolRegistry
+            The symbol registry, to access the symbols appearing in an IET.
+        options : dict
+             The optimization options.
+             Accepted: ['par-collapse-ncores', 'par-collapse-work',
+             'par-chunk-nonaffine', 'par-dynamic-work', 'par-nested']
+             * 'par-collapse-ncores': use a collapse clause if the number of
+               available physical cores is greater than this threshold.
+             * 'par-collapse-work': use a collapse clause if the trip count of the
+               collapsable Iterations is statically known to exceed this threshold.
+             * 'par-chunk-nonaffine': coefficient to adjust the chunk size in
+               non-affine parallel Iterations.
+             * 'par-dynamic-work': use dynamic scheduling if the operation count per
+               iteration exceeds this threshold. Otherwise, use static scheduling.
+             * 'par-nested': nested parallelism if the number of hyperthreads
+               per core is greater than this threshold.
+        platform : Platform
+            The underlying platform.
+        compiler : Compiler
+            The underlying JIT compiler.
+        """
+        super().__init__(key, sregistry, platform, compiler)
+
+        self.collapse_ncores = options['par-collapse-ncores']
+        self.collapse_work = options['par-collapse-work']
+        self.chunk_nonaffine = options['par-chunk-nonaffine']
+        self.dynamic_work = options['par-dynamic-work']
+        self.nested = options['par-nested']
+
+    @property
+    def ncores(self):
+        return self.platform.cores_physical
+
+    @property
+    def nhyperthreads(self):
+        return self.platform.threads_per_core
+
+    @property
+    def nthreads(self):
+        return self.sregistry.nthreads
+
+    @property
+    def nthreads_nested(self):
+        return self.sregistry.nthreads_nested
+
+    @property
+    def nthreads_nonaffine(self):
+        return self.sregistry.nthreads_nonaffine
+
+    @property
+    def threadid(self):
+        return self.sregistry.threadid
+
+    def _score_candidate(self, n0, root, collapsable=()):
+        """
+        The score of a collapsable nest depends on the number of fully-parallel
+        Iterations and their position in the nest (the outer, the better).
+        """
+        nest = [root] + list(collapsable)
+        n = len(nest)
+
+        # Number of fully-parallel collapsable Iterations
+        key = lambda i: i.is_ParallelNoAtomic
+        fp_iters = list(takewhile(key, nest))
+        n_fp_iters = len(fp_iters)
+
+        # Number of parallel-if-atomic collapsable Iterations
+        key = lambda i: i.is_ParallelAtomic
+        pia_iters = list(takewhile(key, nest))
+        n_pia_iters = len(pia_iters)
+
+        # Prioritize the Dimensions that are more likely to define larger
+        # iteration spaces
+        key = lambda d: (not d.is_Derived or
+                         (d.is_Custom and not is_integer(d.symbolic_size)) or
+                         (d.is_Block and d._depth == 1))
+
+        fpdims = [i.dim for i in fp_iters]
+        n_fp_iters_large = len([d for d in fpdims if key(d)])
+
+        piadims = [i.dim for i in pia_iters]
+        n_pia_iters_large = len([d for d in piadims if key(d)])
+
+        return (
+            int(n_fp_iters == n),  # Fully-parallel nest
+            n_fp_iters_large,
+            n_fp_iters,
+            n_pia_iters_large,
+            n_pia_iters,
+            -(n0 + 1),  # The outer, the better
+            n,
+        )
+
+    def _select_candidates(self, candidates):
+        assert candidates
+
+        if self.ncores < self.collapse_ncores:
+            return candidates[0], []
+
+        mapper = {}
+        for n0, root in enumerate(candidates):
+
+            # Score `root` in isolation
+            mapper[(root, ())] = self._score_candidate(n0, root)
+
+            collapsable = []
+            for n, i in enumerate(candidates[n0+1:], n0+1):
+                # The Iteration nest [root, ..., i] must be perfect
+                if not IsPerfectIteration(depth=i).visit(root):
+                    break
+
+                # Loops are collapsable only if none of the iteration variables
+                # appear in initializer expressions. For example, the following
+                # two loops cannot be collapsed
+                #
+                # for (i = ... )
+                #   for (j = i ...)
+                #     ...
+                #
+                # Here, we make sure this won't happen
+                if any(j.dim in i.symbolic_min.free_symbols for j in candidates[n0:n]):
+                    break
+
+                # Can't collapse SIMD-vectorized Iterations
+                if i.is_Vectorized:
+                    break
+
+                # Would there be enough work per parallel iteration?
+                nested = candidates[n+1:]
+                if nested:
+                    try:
+                        work = prod([int(j.dim.symbolic_size) for j in nested])
+                        if work < self.collapse_work:
+                            break
+                    except TypeError:
+                        pass
+
+                collapsable.append(i)
+
+                # Score `root + collapsable`
+                v = tuple(collapsable)
+                mapper[(root, v)] = self._score_candidate(n0, root, v)
+
+        # Retrieve the candidates with highest score
+        root, collapsable = max(mapper, key=mapper.get)
+
+        return root, list(collapsable)
+
+
 class DeviceAwareMixin(object):
 
     @property
diff --git a/devito/passes/iet/orchestration.py b/devito/passes/iet/orchestration.py
index 39fd286f1a..49bc3563d7 100644
--- a/devito/passes/iet/orchestration.py
+++ b/devito/passes/iet/orchestration.py
@@ -195,9 +195,15 @@ def fetchupdate(layer, iet, sync_ops, lang, sregistry):
 
 @fetchupdate.register(HostLayer)
 def _(layer, iet, sync_ops, lang, sregistry):
+    try:
+        qid = sregistry.queue0
+    except AttributeError:
+        qid = None
+
     body = list(iet.body)
     try:
-        body.extend([lang._map_update_device(s.target, s.imask) for s in sync_ops])
+        body.extend([lang._map_update_device(s.target, s.imask, qid=qid)
+                     for s in sync_ops])
         name = 'init_from_%s' % layer.suffix
     except NotImplementedError:
         name = 'init_to_%s' % layer.suffix
diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index ef1cd38af2..b41b871b55 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -1,5 +1,3 @@
-from itertools import takewhile
-
 import numpy as np
 import cgen as c
 from cached_property import cached_property
@@ -13,7 +11,7 @@
                        retrieve_iteration_tree, IMask, VECTORIZED)
 from devito.passes.iet.engine import iet_pass
 from devito.passes.iet.langbase import (LangBB, LangTransformer, DeviceAwareMixin,
-                                        make_sections_from_imask)
+                                        ShmTransformer, make_sections_from_imask)
 from devito.symbolics import INT, ccode
 from devito.tools import as_tuple, flatten, is_integer, prod
 from devito.types import Symbol
@@ -204,164 +202,16 @@ def collapsed(self):
         return tuple(ret)
 
 
-class PragmaShmTransformer(PragmaSimdTransformer):
+class PragmaShmTransformer(ShmTransformer, PragmaSimdTransformer):
 
     """
-    Abstract base class for PragmaTransformers capable of emitting SIMD-parallel
-    and shared-memory-parallel IETs.
+    PragmaTransformer capable of emitting SIMD-parallel and shared-memory-parallel
+    IETs for CPUs.
     """
 
     def __init__(self, sregistry, options, platform, compiler):
-        """
-        Parameters
-        ----------
-        sregistry : SymbolRegistry
-            The symbol registry, to access the symbols appearing in an IET.
-        options : dict
-             The optimization options. Accepted: ['par-collapse-ncores',
-             'par-collapse-work', 'par-chunk-nonaffine', 'par-dynamic-work', 'par-nested']
-             * 'par-collapse-ncores': use a collapse clause if the number of
-               available physical cores is greater than this threshold.
-             * 'par-collapse-work': use a collapse clause if the trip count of the
-               collapsable Iterations is statically known to exceed this threshold.
-             * 'par-chunk-nonaffine': coefficient to adjust the chunk size in
-               non-affine parallel Iterations.
-             * 'par-dynamic-work': use dynamic scheduling if the operation count per
-               iteration exceeds this threshold. Otherwise, use static scheduling.
-             * 'par-nested': nested parallelism if the number of hyperthreads per core
-               is greater than this threshold.
-        platform : Platform
-            The underlying platform.
-        compiler : Compiler
-            The underlying JIT compiler.
-        """
         key = lambda i: i.is_ParallelRelaxed and not i.is_Vectorized
-        super().__init__(key, sregistry, platform, compiler)
-
-        self.collapse_ncores = options['par-collapse-ncores']
-        self.collapse_work = options['par-collapse-work']
-        self.chunk_nonaffine = options['par-chunk-nonaffine']
-        self.dynamic_work = options['par-dynamic-work']
-        self.nested = options['par-nested']
-
-    @property
-    def ncores(self):
-        return self.platform.cores_physical
-
-    @property
-    def nhyperthreads(self):
-        return self.platform.threads_per_core
-
-    @property
-    def nthreads(self):
-        return self.sregistry.nthreads
-
-    @property
-    def nthreads_nested(self):
-        return self.sregistry.nthreads_nested
-
-    @property
-    def nthreads_nonaffine(self):
-        return self.sregistry.nthreads_nonaffine
-
-    @property
-    def threadid(self):
-        return self.sregistry.threadid
-
-    def _score_candidate(self, n0, root, collapsable=()):
-        """
-        The score of a collapsable nest depends on the number of fully-parallel
-        Iterations and their position in the nest (the outer, the better).
-        """
-        nest = [root] + list(collapsable)
-        n = len(nest)
-
-        # Number of fully-parallel collapsable Iterations
-        key = lambda i: i.is_ParallelNoAtomic
-        fp_iters = list(takewhile(key, nest))
-        n_fp_iters = len(fp_iters)
-
-        # Number of parallel-if-atomic collapsable Iterations
-        key = lambda i: i.is_ParallelAtomic
-        pia_iters = list(takewhile(key, nest))
-        n_pia_iters = len(pia_iters)
-
-        # Prioritize the Dimensions that are more likely to define larger
-        # iteration spaces
-        key = lambda d: (not d.is_Derived or
-                         (d.is_Custom and not is_integer(d.symbolic_size)) or
-                         (d.is_Block and d._depth == 1))
-
-        fpdims = [i.dim for i in fp_iters]
-        n_fp_iters_large = len([d for d in fpdims if key(d)])
-
-        piadims = [i.dim for i in pia_iters]
-        n_pia_iters_large = len([d for d in piadims if key(d)])
-
-        return (
-            int(n_fp_iters == n),  # Fully-parallel nest
-            n_fp_iters_large,
-            n_fp_iters,
-            n_pia_iters_large,
-            n_pia_iters,
-            -(n0 + 1),  # The outer, the better
-            n,
-        )
-
-    def _select_candidates(self, candidates):
-        assert candidates
-
-        if self.ncores < self.collapse_ncores:
-            return candidates[0], []
-
-        mapper = {}
-        for n0, root in enumerate(candidates):
-
-            # Score `root` in isolation
-            mapper[(root, ())] = self._score_candidate(n0, root)
-
-            collapsable = []
-            for n, i in enumerate(candidates[n0+1:], n0+1):
-                # The Iteration nest [root, ..., i] must be perfect
-                if not IsPerfectIteration(depth=i).visit(root):
-                    break
-
-                # Loops are collapsable only if none of the iteration variables
-                # appear in initializer expressions. For example, the following
-                # two loops cannot be collapsed
-                #
-                # for (i = ... )
-                #   for (j = i ...)
-                #     ...
-                #
-                # Here, we make sure this won't happen
-                if any(j.dim in i.symbolic_min.free_symbols for j in candidates[n0:n]):
-                    break
-
-                # Can't collapse SIMD-vectorized Iterations
-                if i.is_Vectorized:
-                    break
-
-                # Would there be enough work per parallel iteration?
-                nested = candidates[n+1:]
-                if nested:
-                    try:
-                        work = prod([int(j.dim.symbolic_size) for j in nested])
-                        if work < self.collapse_work:
-                            break
-                    except TypeError:
-                        pass
-
-                collapsable.append(i)
-
-                # Score `root + collapsable`
-                v = tuple(collapsable)
-                mapper[(root, v)] = self._score_candidate(n0, root, v)
-
-        # Retrieve the candidates with highest score
-        root, collapsable = max(mapper, key=mapper.get)
-
-        return root, list(collapsable)
+        super().__init__(key, sregistry, options, platform, compiler)
 
     def _make_reductions(self, partree):
         if not any(i.is_ParallelAtomic for i in partree.collapsed):
diff --git a/devito/symbolics/extended_sympy.py b/devito/symbolics/extended_sympy.py
index 8626ecf094..b8de8bade8 100644
--- a/devito/symbolics/extended_sympy.py
+++ b/devito/symbolics/extended_sympy.py
@@ -16,9 +16,10 @@
 __all__ = ['CondEq', 'CondNe', 'IntDiv', 'CallFromPointer',  # noqa
            'CallFromComposite', 'FieldFromPointer', 'FieldFromComposite',
            'ListInitializer', 'Byref', 'IndexedPointer', 'Cast', 'DefFunction',
-           'InlineIf', 'Keyword', 'String', 'Macro', 'MacroArgument',
-           'CustomType', 'Deref', 'INT', 'FLOAT', 'DOUBLE', 'VOID', 'Null',
-           'SizeOf', 'rfunc', 'cast_mapper', 'BasicWrapperMixin']
+           'InlineIf', 'ReservedWord', 'Keyword', 'String', 'Macro', 'Class',
+           'MacroArgument', 'CustomType', 'Deref', 'Namespace', 'Rvalue',
+           'INT', 'FLOAT', 'DOUBLE', 'VOID', 'Null', 'SizeOf', 'rfunc',
+           'cast_mapper', 'BasicWrapperMixin']
 
 
 class CondEq(sympy.Eq):
@@ -88,8 +89,7 @@ def __new__(cls, lhs, rhs, params=None):
             # Perhaps it's a symbolic RHS -- but we wanna be sure it's of type int
             if not hasattr(rhs, 'dtype'):
                 raise ValueError("Symbolic RHS `%s` lacks dtype" % rhs)
-            if not issubclass(rhs.dtype, np.integer) or \
-                    not (rhs.is_Constant and issubclass(rhs.dtype, np.integer)):
+            if not issubclass(rhs.dtype, np.integer):
                 raise ValueError("Symbolic RHS `%s` must be of type `int`, found "
                                  "`%s` instead" % (rhs, rhs.dtype))
         rhs = sympify(rhs)
@@ -517,6 +517,14 @@ class Macro(ReservedWord):
     pass
 
 
+class Class(ReservedWord):
+
+    def __str__(self):
+        return "class %s" % self.value
+
+    __repr__ = __str__
+
+
 class MacroArgument(sympy.Symbol):
 
     def __str__(self):
@@ -534,8 +542,12 @@ class DefFunction(Function, Pickable):
     """
 
     __rargs__ = ('name', 'arguments')
+    __rkwargs__ = ('template',)
+
+    def __new__(cls, name, arguments=None, template=None, **kwargs):
+        if isinstance(name, str):
+            name = Keyword(name)
 
-    def __new__(cls, name, arguments=None, **kwargs):
         _arguments = []
         for i in as_tuple(arguments):
             if isinstance(i, str):
@@ -545,12 +557,25 @@ def __new__(cls, name, arguments=None, **kwargs):
                 _arguments.append(ReservedWord(i))
             else:
                 _arguments.append(i)
-        arguments = tuple(_arguments)
-        if isinstance(name, str):
-            name = Keyword(name)
-        obj = Function.__new__(cls, name, Tuple(*arguments))
+
+        _template = []
+        for i in as_tuple(template):
+            if isinstance(i, str):
+                # Same story as above
+                _template.append(ReservedWord(i))
+            else:
+                _template.append(i)
+
+        args = [name]
+        args.append(Tuple(*_arguments))
+        if _template:
+            args.append(Tuple(*_template))
+
+        obj = Function.__new__(cls, *args)
         obj._name = name
-        obj._arguments = arguments
+        obj._arguments = tuple(_arguments)
+        obj._template = tuple(_template)
+
         return obj
 
     @property
@@ -561,8 +586,17 @@ def name(self):
     def arguments(self):
         return self._arguments
 
+    @property
+    def template(self):
+        return self._template
+
     def __str__(self):
-        return "%s(%s)" % (self.name, ', '.join(str(i) for i in self.arguments))
+        if self.template:
+            template = '<%s>' % ','.join(str(i) for i in self.template)
+        else:
+            template = ''
+        arguments = ', '.join(str(i) for i in self.arguments)
+        return "%s%s(%s)" % (self.name, template, arguments)
 
     __repr__ = __str__
 
@@ -617,6 +651,90 @@ def __str__(self):
     __reduce_ex__ = Pickable.__reduce_ex__
 
 
+class Namespace(sympy.Expr, Pickable):
+
+    """
+    Symbolic representation of a C++ namespace `ns0::ns1::...`.
+    """
+
+    __rargs__ = ('items',)
+
+    def __new__(cls, items, **kwargs):
+        normalized_items = []
+        for i in as_tuple(items):
+            if isinstance(i, str):
+                normalized_items.append(ReservedWord(i))
+            elif isinstance(i, ReservedWord):
+                normalized_items.append(i)
+            else:
+                raise ValueError("`items` must be iterable of str or ReservedWord")
+
+        obj = sympy.Expr.__new__(cls)
+        obj._items = tuple(items)
+
+        return obj
+
+    def _hashable_content(self):
+        return super()._hashable_content() + self.items
+
+    @property
+    def items(self):
+        return self._items
+
+    def __str__(self):
+        return "::".join(str(i) for i in self.items)
+
+    __repr__ = __str__
+
+
+class Rvalue(sympy.Expr, Pickable):
+
+    """
+    A generic C++ rvalue, that is a value that occupies a temporary location in
+    memory.
+    """
+
+    __rargs__ = ('expr',)
+    __rkwargs__ = ('namespace', 'init')
+
+    def __new__(cls, expr, namespace=None, init=None):
+        args = [expr]
+        if namespace is not None:
+            args.append(namespace)
+        if init is not None:
+            args.append(init)
+
+        obj = sympy.Expr.__new__(cls, *args)
+
+        obj._expr = expr
+        obj._namespace = namespace
+        obj._init = init
+
+        return obj
+
+    @property
+    def expr(self):
+        return self._expr
+
+    @property
+    def namespace(self):
+        return self._namespace
+
+    @property
+    def init(self):
+        return self._init
+
+    def __str__(self):
+        rvalue = str(self.expr)
+        if self.namespace:
+            rvalue = "%s::%s" % (self.namespace, rvalue)
+        if self.init:
+            rvalue = "%s%s" % (rvalue, self.init)
+        return rvalue
+
+    __repr__ = __str__
+
+
 # *** Casting
 
 class CastStar(object):
diff --git a/devito/symbolics/printer.py b/devito/symbolics/printer.py
index 17c04d8a99..6924ffffbd 100644
--- a/devito/symbolics/printer.py
+++ b/devito/symbolics/printer.py
@@ -235,11 +235,17 @@ def _print_TrigonometricFunction(self, expr):
 
     def _print_DefFunction(self, expr):
         arguments = [self._print(i) for i in expr.arguments]
-        return "%s(%s)" % (expr.name, ','.join(arguments))
+        if expr.template:
+            template = '<%s>' % ','.join([str(i) for i in expr.template])
+        else:
+            template = ''
+        return "%s%s(%s)" % (expr.name, template, ','.join(arguments))
 
     def _print_Fallback(self, expr):
         return expr.__str__()
 
+    _print_Namespace = _print_Fallback
+    _print_Rvalue = _print_Fallback
     _print_MacroArgument = _print_Fallback
     _print_IndexedBase = _print_Fallback
     _print_IndexSum = _print_Fallback
diff --git a/devito/tools/dtypes_lowering.py b/devito/tools/dtypes_lowering.py
index 62776eefd5..70a378dae4 100644
--- a/devito/tools/dtypes_lowering.py
+++ b/devito/tools/dtypes_lowering.py
@@ -7,11 +7,13 @@
 import numpy as np
 from cgen import dtype_to_ctype as cgen_dtype_to_ctype
 
+from .utils import as_tuple
+
 __all__ = ['int2', 'int3', 'int4', 'float2', 'float3', 'float4', 'double2',  # noqa
            'double3', 'double4', 'dtypes_vector_mapper', 'dtype_to_mpidtype',
            'dtype_to_cstr', 'dtype_to_ctype', 'dtype_to_mpitype', 'dtype_len',
            'ctypes_to_cstr', 'c_restrict_void_p', 'ctypes_vector_mapper',
-           'is_external_ctype', 'infer_dtype']
+           'is_external_ctype', 'infer_dtype', 'CustomDtype']
 
 
 # *** Custom np.dtypes
@@ -93,6 +95,34 @@ def get_base_dtype(self, v, default=None):
 dtypes_vector_mapper.update({(v, 1): v for v in mapper.values()})
 
 
+# *** Custom types escaping both the numpy and ctypes namespaces
+
+
+class CustomDtype(object):
+
+    def __init__(self, name, template=None, modifier=None):
+        self.name = name
+        self.template = as_tuple(template)
+        self.modifier = modifier or ''
+
+    def __eq__(self, other):
+        return (isinstance(other, CustomDtype) and
+                self.name == other.name and
+                self.template == other.template and
+                self.modifier == other.modifier)
+
+    def __hash__(self):
+        return hash((self.name, self.template, self.modifier))
+
+    def __repr__(self):
+        template = '<%s>' % ','.join([str(i) for i in self.template])
+        return "%s%s%s" % (self.name,
+                           template if self.template else '',
+                           self.modifier)
+
+    __str__ = __repr__
+
+
 # *** np.dtypes lowering
 
 
@@ -180,6 +210,8 @@ def ctypes_to_cstr(ctype, toarray=None):
     """Translate ctypes types into C strings."""
     if ctype in ctypes_vector_mapper.values():
         retval = ctype.__name__
+    elif isinstance(ctype, CustomDtype):
+        retval = str(ctype)
     elif issubclass(ctype, ctypes.Structure):
         retval = 'struct %s' % ctype.__name__
     elif issubclass(ctype, ctypes.Union):
diff --git a/devito/types/basic.py b/devito/types/basic.py
index 53e8a87189..17835933e3 100644
--- a/devito/types/basic.py
+++ b/devito/types/basic.py
@@ -39,7 +39,8 @@ class CodeSymbol(object):
 
         * "liveness": `_mem_external`, `_mem_internal_eager`, `_mem_internal_lazy`
         * "space": `_mem_local`, `_mem_mapped`, `_mem_host`
-        * "scope": `_mem_stack`, `_mem_heap`, `_mem_constant`, `_mem_shared`
+        * "scope": `_mem_stack`, `_mem_heap`, `_mem_global`, `_mem_shared`,
+                   `_mem_constant`
 
     For example, an object that is `<_mem_internal_lazy, _mem_local, _mem_heap>`
     is allocated within the Operator entry point, on either the host or device
@@ -174,29 +175,36 @@ def _mem_host(self):
     @property
     def _mem_stack(self):
         """
-        True if the associated data should be allocated on the stack, False otherwise.
+        True if the associated data is allocated on the stack, False otherwise.
         """
         return False
 
     @property
     def _mem_heap(self):
         """
-        True if the associated data gets allocated on the heap, False otherwise.
+        True if the associated data is allocated on the heap, False otherwise.
         """
         return False
 
+    @property
+    def _mem_global(self):
+        """
+        True if the symbol is globally scoped, False otherwise.
+        """
+        return self._mem_constant
+
     @property
     def _mem_constant(self):
         """
-        True if the associated data gets allocated in global constant memory,
-        False otherwise.
+        True if the associated data is allocated in global constant memory,
+        False otherwise. This is a special case of `_mem_global`.
         """
         return False
 
     @property
     def _mem_shared(self):
         """
-        True if the associated data gets allocated in so called shared memory,
+        True if the associated data is allocated in so called shared memory,
         False otherwise.
         """
         return False
diff --git a/devito/types/object.py b/devito/types/object.py
index 869e794481..3768dc76fd 100644
--- a/devito/types/object.py
+++ b/devito/types/object.py
@@ -168,26 +168,50 @@ class LocalObject(AbstractObject):
     LocalObjects encode their dtype as a class attribute.
     """
 
+    default_initvalue = None
+    """
+    The initial value may or may not be a class-level attribute. In the latter
+    case, it is passed to the constructor.
+    """
+
     __rargs__ = ('name',)
-    __rkwargs__ = ('cargs', 'liveness')
+    __rkwargs__ = ('cargs', 'initvalue', 'liveness', 'is_global')
 
-    def __init__(self, name, cargs=None, **kwargs):
+    def __init__(self, name, cargs=None, initvalue=None, liveness='lazy',
+                 is_global=False, **kwargs):
         self.name = name
         self.cargs = as_tuple(cargs)
+        self.initvalue = initvalue or self.default_initvalue
 
-        self._liveness = kwargs.get('liveness', 'lazy')
-        assert self._liveness in ['eager', 'lazy']
+        assert liveness in ['eager', 'lazy']
+        self._liveness = liveness
+
+        self._is_global = is_global
 
     def _hashable_content(self):
-        return super()._hashable_content() + self.cargs + (self.liveness,)
+        return (super()._hashable_content() +
+                self.cargs +
+                (self.initvalue, self.liveness, self.is_global))
 
     @property
     def liveness(self):
         return self._liveness
 
+    @property
+    def is_global(self):
+        return self._is_global
+
     @property
     def free_symbols(self):
-        return super().free_symbols | set(self.cargs)
+        ret = set()
+        ret.update(super().free_symbols)
+        for i in self.cargs:
+            try:
+                ret.update(i.free_symbols)
+            except AttributeError:
+                # E.g., pure integers
+                pass
+        return ret
 
     @property
     def _C_init(self):
@@ -211,6 +235,13 @@ def _C_free(self):
         """
         return None
 
+    _C_modifier = None
+    """
+    A modifier added to the LocalObject's C declaration when the object appears
+    in a function signature. For example, a subclass might define `_C_modifier = '&'`
+    to impose pass-by-reference semantics.
+    """
+
     @property
     def _mem_internal_eager(self):
         return self._liveness == 'eager'
@@ -218,3 +249,7 @@ def _mem_internal_eager(self):
     @property
     def _mem_internal_lazy(self):
         return self._liveness == 'lazy'
+
+    @property
+    def _mem_global(self):
+        return self._is_global
diff --git a/docker/Dockerfile.amd b/docker/Dockerfile.amd
index bdcc1c5a26..6a104d2872 100644
--- a/docker/Dockerfile.amd
+++ b/docker/Dockerfile.amd
@@ -4,7 +4,6 @@
 ##############################################################
 
 ARG ROCM_VERSION=5.5.1
-ARG arch="aomp"
 
 FROM rocm/dev-ubuntu-22.04:${ROCM_VERSION}-complete as sdk-base
 
diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu
index 238ab4ff5e..bed0bbad24 100644
--- a/docker/Dockerfile.cpu
+++ b/docker/Dockerfile.cpu
@@ -1,9 +1,9 @@
 # syntax=docker/dockerfile:1
 ##############################################################
-# This Dockerfile contains the Devito codes and can be built using different base images.
+# This Dockerfile builds a base image to run Devito on generic CPU
+# architectures using GCC compilers and OpenMPI.
 ##############################################################
 
-ARG arch=gcc
 ARG OMPI_BRANCH="v4.1.4"
 
 # Base image 
@@ -56,87 +56,3 @@ ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/openmpi/lib
 # Env vars defaults
 ENV DEVITO_ARCH="gcc"
 ENV DEVITO_LANGUAGE="openmp"
-
-##############################################################
-# Intel Oneapi base
-##############################################################
-FROM base as oneapi
-
-# Download the key to system keyring
-# https://www.intel.com/content/www/us/en/develop/documentation/installation-guide-for-intel-oneapi-toolkits-linux/top/installation/install-using-package-managers/apt.html#apt
-RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor > /usr/share/keyrings/oneapi-archive-keyring.gpg
-RUN echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list
-
-# Intel advisor and drivers
-RUN apt-get update -y && \
-    # advisor
-    apt-get install -y intel-oneapi-advisor
-
-# Drivers mandatory for intel gpu
-# https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal.html#ubuntu-20-04-focal
-RUN wget -qO - https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor > /usr/share/keyrings/intel-graphics.gpg
-RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu focal main" >  /etc/apt/sources.list.d/intel.list
-
-RUN apt-get update -y && apt-get dist-upgrade -y && \
-    apt-get install -y intel-opencl-icd intel-level-zero-gpu level-zero level-zero-dev \
-                     intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \
-                     libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
-
-##############################################################
-# ICC image
-# This is a legacy setup that is not built anymore but kept for reference
-##############################################################
-FROM oneapi as icc
-
-RUN apt-get update -y && apt-get install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic intel-oneapi-mpi-devel && \
-    apt-get clean && apt-get autoclean && apt-get autoremove -y && \
-    rm -rf /var/lib/apt/lists/*
-
-# Devito config
-ENV DEVITO_ARCH="icc"
-ENV DEVITO_LANGUAGE="openmp"
-# MPICC compiler for mpi4py
-ENV MPICC=mpiicc
-ENV MPI4PY_FLAGS='. /opt/intel/oneapi/setvars.sh && CFLAGS="-cc=icc"'
-
-##############################################################
-# ICX image
-##############################################################
-FROM oneapi as icx
-
-RUN apt-get update -y && apt-get install -y intel-oneapi-compiler-dpcpp-cpp intel-oneapi-mpi-devel && \
-    apt-get clean && apt-get autoclean && apt-get autoremove -y && \
-    rm -rf /var/lib/apt/lists/*
-
-# Devito config
-ENV DEVITO_ARCH="icx"
-ENV DEVITO_LANGUAGE="openmp"
-# MPICC compiler for mpi4py
-ENV MPICC=mpiicc
-ENV MPI4PY_FLAGS='. /opt/intel/oneapi/setvars.sh && CFLAGS="-cc=icx"'
-
-##############################################################
-# ICX hpc image
-##############################################################
-FROM oneapi as icx-hpc
-
-# Install both icc and icx to avoid missing dependencies
-RUN apt-get update -y && \
-    apt-get install -y intel-oneapi-compiler-dpcpp-cpp intel-oneapi-mpi-devel  && \
-    apt-get install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic
-
-# Missig components
-# https://www.intel.com/content/www/us/en/developer/tools/oneapi/hpc-toolkit-download.html?operatingsystem=linux&distributions=aptpackagemanager
-RUN curl -f "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/ebf5d9aa-17a7-46a4-b5df-ace004227c0e/l_dpcpp-cpp-compiler_p_2023.2.1.8.sh" -O && \
-    chmod +x l_dpcpp-cpp-compiler_p_2023.2.1.8.sh && ./l_dpcpp-cpp-compiler_p_2023.2.1.8.sh -a -s  --eula accept && \
-    rm l_dpcpp-cpp-compiler_p_2023.2.1.8.sh
-
-RUN apt-get clean && apt-get autoclean &&  apt-get autoremove -y && \
-    rm -rf /var/lib/apt/lists/*
-
-# Devito config
-ENV DEVITO_ARCH="icx"
-ENV DEVITO_LANGUAGE="openmp"
-# MPICC compiler for mpi4py
-ENV MPICC=mpiicc
-ENV MPI4PY_FLAGS='. /opt/intel/oneapi/setvars.sh && CFLAGS="-cc=icx"'
\ No newline at end of file
diff --git a/docker/Dockerfile.devito b/docker/Dockerfile.devito
index 703964e45e..aeda36d615 100644
--- a/docker/Dockerfile.devito
+++ b/docker/Dockerfile.devito
@@ -1,5 +1,5 @@
 ##############################################################
-# This Dockerfile contains the Devito codes and can be built using different base images.
+# This Dockerfile contains Devito and can be built using different base images.
 ##############################################################
 
 # Base image with compilers
diff --git a/docker/Dockerfile.intel b/docker/Dockerfile.intel
new file mode 100644
index 0000000000..48757d9776
--- /dev/null
+++ b/docker/Dockerfile.intel
@@ -0,0 +1,114 @@
+# syntax=docker/dockerfile:1
+##############################################################
+# This Dockerfile contains the Intel OneAPI toolkit for Devito
+##############################################################
+
+# Base image
+FROM ubuntu:22.04 as base
+
+ENV DEBIAN_FRONTEND noninteractive
+
+# Install python
+RUN apt-get update && \
+    apt-get install -y dh-autoreconf python3-venv python3-dev python3-pip
+
+# Install for basic base not containing it
+RUN apt-get install -y vim wget git flex libnuma-dev tmux \
+        numactl hwloc curl \
+        autoconf libtool build-essential procps
+
+# Install tmpi
+RUN curl https://raw.githubusercontent.com/Azrael3000/tmpi/master/tmpi -o /usr/local/bin/tmpi
+
+# Install OpenGL library, necessary for the installation of GemPy
+RUN apt-get install -y libgl1-mesa-glx
+
+RUN apt-get clean && apt-get autoclean && apt-get autoremove  -y && \
+    rm -rf /var/lib/apt/lists/*
+
+EXPOSE 8888
+CMD ["/bin/bash"]
+
+##############################################################
+# Intel OneAPI standard image
+##############################################################
+FROM base as oneapi
+
+# Download the key to system keyring
+# https://www.intel.com/content/www/us/en/develop/documentation/installation-guide-for-intel-oneapi-toolkits-linux/top/installation/install-using-package-managers/apt.html#apt
+RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor > /usr/share/keyrings/oneapi-archive-keyring.gpg
+RUN echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list
+
+# Intel advisor and drivers
+RUN apt-get update -y && \
+    # advisor
+    apt-get install -y intel-oneapi-advisor
+
+# Drivers mandatory for intel gpu
+# https://dgpu-docs.intel.com/driver/installation.html#ubuntu-install-steps
+RUN wget -qO - https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor > /usr/share/keyrings/intel-graphics.gpg
+RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu jammy unified" >  /etc/apt/sources.list.d/intel-gpu-jammy.list
+
+RUN apt-get update -y && apt-get dist-upgrade -y && \
+    # Compute and media runtime
+    apt-get install -y intel-opencl-icd intel-level-zero-gpu level-zero \
+            intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \
+            libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
+            libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
+            mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo \
+            # Development packages
+            libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
+
+##############################################################
+# ICC image
+# This is a legacy setup that is not built anymore but kept for reference
+##############################################################
+FROM oneapi as icc
+
+RUN apt-get update -y && apt-get install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic intel-oneapi-mpi-devel && \
+    apt-get clean && apt-get autoclean && apt-get autoremove -y && \
+    rm -rf /var/lib/apt/lists/*
+
+# Devito config
+ENV DEVITO_ARCH="icc"
+ENV DEVITO_LANGUAGE="openmp"
+ENV DEVITO_PLATFORM="intel64"
+# MPICC compiler for mpi4py
+ENV MPICC=mpiicc
+ENV MPI4PY_FLAGS='. /opt/intel/oneapi/setvars.sh && CFLAGS="-cc=icc"'
+
+##############################################################
+# ICX OpenMP image
+##############################################################
+FROM oneapi as icx
+
+RUN apt-get update -y && apt-get install -y intel-oneapi-compiler-dpcpp-cpp intel-oneapi-mpi-devel && \
+    apt-get clean && apt-get autoclean && apt-get autoremove -y && \
+    rm -rf /var/lib/apt/lists/*
+
+# Devito config
+ENV DEVITO_ARCH="icx"
+ENV DEVITO_LANGUAGE="openmp"
+# MPICC compiler for mpi4py
+ENV MPICC=mpiicc
+ENV MPI4PY_FLAGS='. /opt/intel/oneapi/setvars.sh && CFLAGS="-cc=icx"'
+
+##############################################################
+# ICX SYCL CPU image
+##############################################################
+FROM icx as cpu-sycl
+
+# Devito config
+ENV DEVITO_ARCH="sycl"
+ENV DEVITO_LANGUAGE="sycl"
+ENV DEVITO_PLATFORM="intel64"
+
+##############################################################
+# ICX SYCL GPU image
+##############################################################
+FROM icx as gpu-sycl
+
+# Devito config
+ENV DEVITO_ARCH="sycl"
+ENV DEVITO_LANGUAGE="sycl"
+ENV DEVITO_PLATFORM="intelgpuX"
diff --git a/tests/test_iet.py b/tests/test_iet.py
index d148dc1539..6ee6c13ca7 100644
--- a/tests/test_iet.py
+++ b/tests/test_iet.py
@@ -2,18 +2,21 @@
 
 from ctypes import c_void_p
 import cgen
+import numpy as np
 import sympy
 
 from devito import (Eq, Grid, Function, TimeFunction, Operator, Dimension,  # noqa
                     switchconfig)
 from devito.ir.iet import (Call, Callable, Conditional, DummyExpr, Iteration, List,
                            KernelLaunch, Lambda, ElementalFunction, CGen, FindSymbols,
-                           filter_iterations, make_efunc, retrieve_iteration_tree)
+                           filter_iterations, make_efunc, retrieve_iteration_tree,
+                           Transformer)
 from devito.ir import SymbolRegistry
 from devito.passes.iet.engine import Graph
 from devito.passes.iet.languages.C import CDataManager
-from devito.symbolics import Byref, FieldFromComposite, InlineIf, Macro
-from devito.tools import as_tuple
+from devito.symbolics import (Byref, FieldFromComposite, InlineIf, Macro, Class,
+                              FLOAT)
+from devito.tools import CustomDtype, as_tuple, dtype_to_ctype
 from devito.types import Array, LocalObject, Symbol
 
 
@@ -143,7 +146,7 @@ def test_list_denesting():
 
 def test_make_cpp_parfor():
     """
-    Test construction of a CPP parallel for. This excites the IET construction
+    Test construction of a C++ parallel for. This excites the IET construction
     machinery in several ways, in particular by using Lambda nodes (to generate
     C++ lambda functions) and nested Calls.
     """
@@ -273,6 +276,59 @@ def _C_free(self):
 }"""
 
 
+def test_cpp_local_object():
+    """
+    Test C++ support for LocalObjects.
+    """
+
+    class MyObject(LocalObject):
+        dtype = CustomDtype('dummy')
+
+    # Locally-scoped objects are declared in the function body
+    lo0 = MyObject('obj0')
+
+    # Globally-scoped objects must not be declared in the function body
+    lo1 = MyObject('obj1', is_global=True)
+
+    # A LocalObject using both a template and a modifier
+    class SpecialObject(LocalObject):
+        dtype = CustomDtype('bar', template=('int', 'float'), modifier='&')
+
+    lo2 = SpecialObject('obj2')
+
+    # A LocalObject instantiated and subsequently assigned a value
+    lo3 = MyObject('obj3', initvalue=Macro('meh'))
+
+    # A LocalObject instantiated calling its 2-args constructor and subsequently
+    # assigned a value
+    lo4 = MyObject('obj4', cargs=(1, 2), initvalue=Macro('meh'))
+
+    # A LocalObject with generic sympy exprs used as constructor args
+    expr = sympy.Function('ceil')(FLOAT(Symbol(name='s'))**-1)
+    lo5 = MyObject('obj5', cargs=(expr,), initvalue=Macro('meh'))
+
+    # A LocalObject with class-level initvalue and numeric dtype
+    class SpecialObject2(LocalObject):
+        dtype = dtype_to_ctype(np.float32)
+        default_initvalue = Macro('meh')
+
+    lo6 = SpecialObject2('obj6')
+
+    iet = Call('foo', [lo0, lo1, lo2, lo3, lo4, lo5, lo6])
+    iet = ElementalFunction('foo', iet, parameters=())
+
+    dm = CDataManager(sregistry=None)
+    iet = CDataManager.place_definitions.__wrapped__(dm, iet)[0]
+
+    assert 'dummy obj0;' in str(iet)
+    assert 'dummy obj1;' not in str(iet)
+    assert 'bar<int,float>& obj2;' in str(iet)
+    assert 'dummy obj3 = meh;' in str(iet)
+    assert 'dummy obj4(1,2) = meh;' in str(iet)
+    assert 'dummy obj5(ceil(1.0F/(float)s)) = meh;' in str(iet)
+    assert 'float obj6 = meh;' in str(iet)
+
+
 def test_call_indexed():
     grid = Grid(shape=(10, 10))
 
@@ -302,6 +358,28 @@ def test_call_retobj_indexed():
     assert not call.defines
 
 
+def test_call_lambda_transform():
+    grid = Grid(shape=(10, 10))
+    x, y = grid.dimensions
+
+    u = Function(name='u', grid=grid)
+
+    e0 = DummyExpr(x, 1)
+    e1 = DummyExpr(y, 1)
+
+    body = List(body=[e0, e1])
+    call = Call('foo', [u, Lambda(body)])
+
+    subs = {e0: DummyExpr(x, 2), e1: DummyExpr(y, 2)}
+
+    assert str(Transformer(subs).visit(call)) == """\
+foo(u_vec,[]()
+{
+  x = 2;
+  y = 2;
+});"""
+
+
 def test_null_init():
     grid = Grid(shape=(10, 10))
 
@@ -313,7 +391,7 @@ def test_null_init():
     assert expr.defines == (u.indexed,)
 
 
-def test_templates():
+def test_templates_callable():
     grid = Grid(shape=(10, 10))
     x, y = grid.dimensions
 
@@ -330,6 +408,17 @@ def test_templates():
 }"""
 
 
+def test_templates_call():
+    grid = Grid(shape=(10, 10))
+    x, y = grid.dimensions
+
+    u = Function(name='u', grid=grid)
+
+    foo = Call('foo', u, templates=[Class('a'), Class('b')])
+
+    assert str(foo) == "foo<class a, class b>(u_vec);"
+
+
 def test_kernel_launch():
     grid = Grid(shape=(10, 10))
 
diff --git a/tests/test_symbolics.py b/tests/test_symbolics.py
index fcc7395b05..3d3500c98e 100644
--- a/tests/test_symbolics.py
+++ b/tests/test_symbolics.py
@@ -10,7 +10,8 @@
 from devito.ir import Expression, FindNodes
 from devito.symbolics import (retrieve_functions, retrieve_indexed, evalrel,  # noqa
                               CallFromPointer, Cast, DefFunction, FieldFromPointer,
-                              INT, FieldFromComposite, IntDiv, ccode, uxreplace,
+                              INT, FieldFromComposite, IntDiv, Namespace, Rvalue,
+                              ReservedWord, ListInitializer, ccode, uxreplace,
                               retrieve_derivatives)
 from devito.tools import as_tuple
 from devito.types import (Array, Bundle, FIndexed, LocalObject, Object,
@@ -287,6 +288,52 @@ def test_intdiv():
     assert ccode(v) == 'b*((a + b) / 2) + 3'
 
 
+def test_def_function():
+    foo0 = DefFunction('foo', arguments=['a', 'b'], template=['int'])
+    foo1 = DefFunction('foo', arguments=['a', 'b'], template=['int'])
+    foo2 = DefFunction('foo', arguments=['a', 'b'])
+    foo3 = DefFunction('foo', arguments=['a'])
+
+    # Code generation
+    assert str(foo0) == 'foo<int>(a, b)'
+    assert str(foo3) == 'foo(a)'
+
+    # Hashing and equality
+    assert hash(foo0) == hash(foo1)
+    assert foo0 == foo1
+    assert hash(foo0) != hash(foo2)
+    assert hash(foo2) != hash(foo3)
+
+    # Reconstruction
+    assert foo0 == foo0._rebuild()
+    assert str(foo0._rebuild('bar', template=['float'])) == 'bar<float>(a, b)'
+
+
+def test_namespace():
+    ns0 = Namespace(['std', 'algorithms', 'parallel'])
+    assert str(ns0) == 'std::algorithms::parallel'
+
+    ns1 = Namespace(['std'])
+    ns2 = Namespace(['std', 'algorithms', 'parallel'])
+
+    # Test hashing and equality
+    assert hash(ns0) != hash(ns1)  # Same reason as above
+    assert ns0 != ns1
+    assert hash(ns0) == hash(ns2)
+    assert ns0 == ns2
+
+    # Free symbols
+    assert not ns0.free_symbols
+
+
+def test_rvalue():
+    ctype = ReservedWord('dummytype')
+    ns = Namespace(['my', 'namespace'])
+    init = ListInitializer(())
+
+    assert str(Rvalue(ctype, ns, init)) == 'my::namespace::dummytype{}'
+
+
 def test_cast():
     s = Symbol(name='s', dtype=np.float32)
 
diff --git a/tests/test_unexpansion.py b/tests/test_unexpansion.py
index 8a4dcbbfed..97b855326d 100644
--- a/tests/test_unexpansion.py
+++ b/tests/test_unexpansion.py
@@ -4,6 +4,7 @@
 from conftest import assert_structure, get_params, get_arrays, check_array
 from devito import (Buffer, Eq, Function, TimeFunction, Grid, Operator,
                     Substitutions, Coefficient, cos, sin)
+from devito.finite_differences import Weights
 from devito.arch.compiler import OneapiCompiler
 from devito.ir import Expression, FindNodes, FindSymbols
 from devito.parameters import switchconfig, configuration
@@ -91,7 +92,9 @@ def test_multiple_cross_derivs(self, coeffs, expected):
         op.cfunction
 
         # w0, w1, ...
-        assert len(op._globals) == expected
+        functions = FindSymbols().visit(op)
+        weights = [f for f in functions if isinstance(f, Weights)]
+        assert len(weights) == expected
 
 
 class Test1Pass(object):