Skip to content

Commit

Permalink
Merge pull request #1980 from devitocodes/gen-norm-reductions
Browse files Browse the repository at this point in the history
compiler: Generalize lowering of reductions
  • Loading branch information
mloubout authored Aug 3, 2022
2 parents 774720c + 693485f commit 408d35b
Show file tree
Hide file tree
Showing 9 changed files with 64 additions and 17 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/docker-devito.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ jobs:
tag: 'nvidia-nvc'
flag: '--gpus all'
test: 'tests/test_gpu_openacc.py tests/test_gpu_common.py'
runner: ["self-hosted", "gpu", "docker"]
runner: ["self-hosted", "gpu", "docker", "v1004"]

- base: 'bases:nvidia-clang'
tag: 'nvidia-clang'
flag: '--gpus all'
test: 'tests/test_gpu_openmp.py tests/test_gpu_common.py'
runner: ["self-hosted", "gpu", "docker"]
runner: ["self-hosted", "gpu", "docker", "kimogila"]

# Runtime gpu flags from https://hub.docker.com/r/rocm/tensorflow/
- base: 'bases:amd'
Expand Down
7 changes: 7 additions & 0 deletions devito/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ class Cpu64OperatorMixin(object):
than this threshold.
"""

MAPIFY_REDUCE = False
"""
Vector-expand all scalar reductions to turn them into explicit map-reductions,
which may be easier to parallelize for certain backends.
"""

@classmethod
def _normalize_kwargs(cls, **kwargs):
o = {}
Expand Down Expand Up @@ -119,6 +125,7 @@ def _normalize_kwargs(cls, **kwargs):
# Misc
o['optcomms'] = oo.pop('optcomms', True)
o['linearize'] = oo.pop('linearize', False)
o['mapify-reduce'] = oo.pop('mapify-reduce', cls.MAPIFY_REDUCE)

# Recognised but unused by the CPU backend
oo.pop('par-disabled', None)
Expand Down
7 changes: 7 additions & 0 deletions devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ class DeviceOperatorMixin(object):
Assuming all functions fit into the gpu memory.
"""

MAPIFY_REDUCE = False
"""
Vector-expand all scalar reductions to turn them into explicit map-reductions,
which may be easier to parallelize for certain backends.
"""

@classmethod
def _normalize_kwargs(cls, **kwargs):
o = {}
Expand Down Expand Up @@ -104,6 +110,7 @@ def _normalize_kwargs(cls, **kwargs):
# Misc
o['optcomms'] = oo.pop('optcomms', True)
o['linearize'] = oo.pop('linearize', False)
o['mapify-reduce'] = oo.pop('mapify-reduce', cls.MAPIFY_REDUCE)

if oo:
raise InvalidOperator("Unsupported optimization options: [%s]"
Expand Down
44 changes: 36 additions & 8 deletions devito/ir/clusters/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
from devito.symbolics import retrieve_indexed, uxreplace, xreplace_indices
from devito.tools import (DefaultOrderedDict, Stamp, as_mapper, flatten,
is_integer, timed_pass)
from devito.types import Eq, Symbol
from devito.types import Array, Eq, Inc, Symbol
from devito.types.dimension import BOTTOM, ModuloDimension

__all__ = ['clusterize']


def clusterize(exprs, options=None, **kwargs):
def clusterize(exprs, **kwargs):
"""
Turn a sequence of LoweredEqs into a sequence of Clusters.
"""
Expand All @@ -36,7 +36,7 @@ def clusterize(exprs, options=None, **kwargs):
clusters = guard(clusters)

# Determine relevant computational properties (e.g., parallelism)
clusters = analyze(clusters, options)
clusters = analyze(clusters)

# Input normalization (e.g., SSA)
clusters = normalize(clusters, **kwargs)
Expand Down Expand Up @@ -322,10 +322,11 @@ def rule(size, e):


def normalize(clusters, **kwargs):
options = kwargs['options']
sregistry = kwargs['sregistry']

clusters = normalize_nested_indexeds(clusters, sregistry)
clusters = normalize_reductions(clusters, sregistry)
clusters = normalize_reductions(clusters, sregistry, options)

return clusters

Expand Down Expand Up @@ -368,19 +369,46 @@ def pull_indexeds(expr, subs, mapper, parent=None):


@cluster_pass(mode='all')
def normalize_reductions(cluster, sregistry):
def normalize_reductions(cluster, sregistry, options):
"""
Extract the right-hand sides of reduction Eq's in to temporaries.
"""
if not any(PARALLEL_IF_ATOMIC in v for v in cluster.properties.values()):
opt_mapify_reduce = options['mapify-reduce']

dims = [d for d, v in cluster.properties.items() if PARALLEL_IF_ATOMIC in v]

if not dims:
return cluster

processed = []
for e in cluster.exprs:
if e.is_Increment and e.lhs.function.is_AbstractFunction:
v = Symbol(name=sregistry.make_name(), dtype=e.dtype)
if e.is_Reduction and e.lhs.is_Indexed and cluster.is_sparse:
# Transform `e` such that we reduce into a scalar (ultimately via
# atomic ops, though this part is carried out by a much later pass)
# For example, given `i = m[p_src]` (i.e., indirection array), turn:
# `u[t, i] += f(u[t, i], src, ...)`
# into
# `s = f(u[t, i], src, ...)`
# `u[t, i] += s`
name = sregistry.make_name()
v = Symbol(name=name, dtype=e.dtype)
processed.extend([e.func(v, e.rhs, operation=None),
e.func(e.lhs, v)])

elif e.is_Reduction and e.lhs.is_Symbol and opt_mapify_reduce:
# Transform `e` into what is in essence an explicit map-reduce
# For example, turn:
# `s += f(u[x], v[x], ...)`
# into
# `r[x] = f(u[x], v[x], ...)`
# `s += r[x]`
# This makes it much easier to parallelize the map part regardless
# of the target backend
name = sregistry.make_name()
a = Array(name=name, dtype=e.dtype, dimensions=dims)
processed.extend([Eq(a.indexify(), e.rhs),
Inc(e.lhs, a.indexify())])

else:
processed.append(e)

Expand Down
2 changes: 1 addition & 1 deletion devito/ir/clusters/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


@timed_pass()
def analyze(clusters, options):
def analyze(clusters):
state = QueueStateful.State()

# Collect properties
Expand Down
5 changes: 4 additions & 1 deletion devito/ir/clusters/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,12 @@ def is_dense(self):
# Fallback to legacy is_dense checks
return (not any(e.conditionals for e in self.exprs) and
not any(f.is_SparseFunction for f in self.functions) and
not self.is_scalar and
all(a.is_regular for a in self.scope.accesses))

@property
def is_sparse(self):
return not self.is_dense

@cached_property
def dtype(self):
"""
Expand Down
4 changes: 2 additions & 2 deletions devito/passes/clusters/factorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from sympy import Add, Mul, S, collect

from devito.ir import cluster_pass
from devito.symbolics import estimate_cost, retrieve_symbols
from devito.symbolics import BasicWrapperMixin, estimate_cost, retrieve_symbols
from devito.tools import ReducerMap

__all__ = ['factorize']
Expand Down Expand Up @@ -115,7 +115,7 @@ def run(expr):
return expr, {'funcs': expr}
elif expr.is_Pow:
return expr, {'pows': expr}
elif expr.is_Symbol or expr.is_Indexed or expr.is_Atom:
elif expr.is_Symbol or expr.is_Indexed or isinstance(expr, BasicWrapperMixin):
return expr, {}
elif expr.is_Add:
args, candidates = zip(*[run(arg) for arg in expr.args])
Expand Down
5 changes: 3 additions & 2 deletions devito/passes/clusters/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from devito.symbolics import uxreplace
from devito.types import Symbol
from devito.types import Symbol, Wildcard

__all__ = ['makeit_ssa']

Expand All @@ -11,7 +11,8 @@ def makeit_ssa(exprs):
# Identify recurring LHSs
seen = {}
for i, e in enumerate(exprs):
seen.setdefault(e.lhs, []).append(i)
if not isinstance(e.lhs, Wildcard):
seen.setdefault(e.lhs, []).append(i)
# Optimization: don't waste time reconstructing stuff if already in SSA form
if all(len(i) == 1 for i in seen.values()):
return exprs
Expand Down
3 changes: 2 additions & 1 deletion devito/symbolics/extended_sympy.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
'FieldFromComposite', 'ListInitializer', 'Byref', 'IndexedPointer', 'Cast',
'DefFunction', 'InlineIf', 'Keyword', 'String', 'Macro', 'MacroArgument',
'CustomType', 'Deref', 'INT', 'FLOAT', 'DOUBLE', 'VOID', 'CEIL',
'FLOOR', 'MAX', 'MIN', 'Null', 'SizeOf', 'rfunc', 'cast_mapper']
'FLOOR', 'MAX', 'MIN', 'Null', 'SizeOf', 'rfunc', 'cast_mapper',
'BasicWrapperMixin']


class CondEq(sympy.Eq):
Expand Down

0 comments on commit 408d35b

Please sign in to comment.