Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compiler: Generalize lowering of reductions #1980

Merged
merged 4 commits into from
Aug 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/docker-devito.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ jobs:
tag: 'nvidia-nvc'
flag: '--gpus all'
test: 'tests/test_gpu_openacc.py tests/test_gpu_common.py'
runner: ["self-hosted", "gpu", "docker"]
runner: ["self-hosted", "gpu", "docker", "v1004"]

- base: 'bases:nvidia-clang'
tag: 'nvidia-clang'
flag: '--gpus all'
test: 'tests/test_gpu_openmp.py tests/test_gpu_common.py'
runner: ["self-hosted", "gpu", "docker"]
runner: ["self-hosted", "gpu", "docker", "kimogila"]

# Runtime gpu flags from https://hub.docker.com/r/rocm/tensorflow/
- base: 'bases:amd'
Expand Down
7 changes: 7 additions & 0 deletions devito/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ class Cpu64OperatorMixin(object):
than this threshold.
"""

MAPIFY_REDUCE = False
"""
Vector-expand all scalar reductions to turn them into explicit map-reductions,
georgebisbas marked this conversation as resolved.
Show resolved Hide resolved
which may be easier to parallelize for certain backends.
"""

@classmethod
def _normalize_kwargs(cls, **kwargs):
o = {}
Expand Down Expand Up @@ -119,6 +125,7 @@ def _normalize_kwargs(cls, **kwargs):
# Misc
o['optcomms'] = oo.pop('optcomms', True)
o['linearize'] = oo.pop('linearize', False)
o['mapify-reduce'] = oo.pop('mapify-reduce', cls.MAPIFY_REDUCE)

# Recognised but unused by the CPU backend
oo.pop('par-disabled', None)
Expand Down
7 changes: 7 additions & 0 deletions devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ class DeviceOperatorMixin(object):
Assuming all functions fit into the gpu memory.
"""

MAPIFY_REDUCE = False
"""
Vector-expand all scalar reductions to turn them into explicit map-reductions,
which may be easier to parallelize for certain backends.
"""

@classmethod
def _normalize_kwargs(cls, **kwargs):
o = {}
Expand Down Expand Up @@ -104,6 +110,7 @@ def _normalize_kwargs(cls, **kwargs):
# Misc
o['optcomms'] = oo.pop('optcomms', True)
o['linearize'] = oo.pop('linearize', False)
o['mapify-reduce'] = oo.pop('mapify-reduce', cls.MAPIFY_REDUCE)

if oo:
raise InvalidOperator("Unsupported optimization options: [%s]"
Expand Down
44 changes: 36 additions & 8 deletions devito/ir/clusters/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
from devito.symbolics import retrieve_indexed, uxreplace, xreplace_indices
from devito.tools import (DefaultOrderedDict, Stamp, as_mapper, flatten,
is_integer, timed_pass)
from devito.types import Eq, Symbol
from devito.types import Array, Eq, Inc, Symbol
from devito.types.dimension import BOTTOM, ModuloDimension

__all__ = ['clusterize']


def clusterize(exprs, options=None, **kwargs):
def clusterize(exprs, **kwargs):
"""
Turn a sequence of LoweredEqs into a sequence of Clusters.
"""
Expand All @@ -36,7 +36,7 @@ def clusterize(exprs, options=None, **kwargs):
clusters = guard(clusters)

# Determine relevant computational properties (e.g., parallelism)
clusters = analyze(clusters, options)
clusters = analyze(clusters)

# Input normalization (e.g., SSA)
clusters = normalize(clusters, **kwargs)
Expand Down Expand Up @@ -322,10 +322,11 @@ def rule(size, e):


def normalize(clusters, **kwargs):
options = kwargs['options']
sregistry = kwargs['sregistry']

clusters = normalize_nested_indexeds(clusters, sregistry)
clusters = normalize_reductions(clusters, sregistry)
clusters = normalize_reductions(clusters, sregistry, options)

return clusters

Expand Down Expand Up @@ -368,19 +369,46 @@ def pull_indexeds(expr, subs, mapper, parent=None):


@cluster_pass(mode='all')
def normalize_reductions(cluster, sregistry):
def normalize_reductions(cluster, sregistry, options):
"""
Extract the right-hand sides of reduction Eq's in to temporaries.
"""
if not any(PARALLEL_IF_ATOMIC in v for v in cluster.properties.values()):
opt_mapify_reduce = options['mapify-reduce']

dims = [d for d, v in cluster.properties.items() if PARALLEL_IF_ATOMIC in v]

if not dims:
return cluster

processed = []
for e in cluster.exprs:
if e.is_Increment and e.lhs.function.is_AbstractFunction:
v = Symbol(name=sregistry.make_name(), dtype=e.dtype)
if e.is_Reduction and e.lhs.is_Indexed and cluster.is_sparse:
# Transform `e` such that we reduce into a scalar (ultimately via
# atomic ops, though this part is carried out by a much later pass)
# For example, given `i = m[p_src]` (i.e., indirection array), turn:
# `u[t, i] += f(u[t, i], src, ...)`
# into
# `s = f(u[t, i], src, ...)`
# `u[t, i] += s`
name = sregistry.make_name()
v = Symbol(name=name, dtype=e.dtype)
processed.extend([e.func(v, e.rhs, operation=None),
e.func(e.lhs, v)])

elif e.is_Reduction and e.lhs.is_Symbol and opt_mapify_reduce:
# Transform `e` into what is in essence an explicit map-reduce
# For example, turn:
# `s += f(u[x], v[x], ...)`
# into
# `r[x] = f(u[x], v[x], ...)`
mloubout marked this conversation as resolved.
Show resolved Hide resolved
# `s += r[x]`
# This makes it much easier to parallelize the map part regardless
# of the target backend
name = sregistry.make_name()
a = Array(name=name, dtype=e.dtype, dimensions=dims)
processed.extend([Eq(a.indexify(), e.rhs),
Inc(e.lhs, a.indexify())])

else:
processed.append(e)

Expand Down
2 changes: 1 addition & 1 deletion devito/ir/clusters/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


@timed_pass()
def analyze(clusters, options):
def analyze(clusters):
state = QueueStateful.State()

# Collect properties
Expand Down
5 changes: 4 additions & 1 deletion devito/ir/clusters/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,12 @@ def is_dense(self):
# Fallback to legacy is_dense checks
return (not any(e.conditionals for e in self.exprs) and
not any(f.is_SparseFunction for f in self.functions) and
not self.is_scalar and
all(a.is_regular for a in self.scope.accesses))

@property
def is_sparse(self):
return not self.is_dense

@cached_property
def dtype(self):
"""
Expand Down
4 changes: 2 additions & 2 deletions devito/passes/clusters/factorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from sympy import Add, Mul, S, collect

from devito.ir import cluster_pass
from devito.symbolics import estimate_cost, retrieve_symbols
from devito.symbolics import BasicWrapperMixin, estimate_cost, retrieve_symbols
from devito.tools import ReducerMap

__all__ = ['factorize']
Expand Down Expand Up @@ -115,7 +115,7 @@ def run(expr):
return expr, {'funcs': expr}
elif expr.is_Pow:
return expr, {'pows': expr}
elif expr.is_Symbol or expr.is_Indexed or expr.is_Atom:
elif expr.is_Symbol or expr.is_Indexed or isinstance(expr, BasicWrapperMixin):
return expr, {}
elif expr.is_Add:
args, candidates = zip(*[run(arg) for arg in expr.args])
Expand Down
5 changes: 3 additions & 2 deletions devito/passes/clusters/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from devito.symbolics import uxreplace
from devito.types import Symbol
from devito.types import Symbol, Wildcard

__all__ = ['makeit_ssa']

Expand All @@ -11,7 +11,8 @@ def makeit_ssa(exprs):
# Identify recurring LHSs
seen = {}
for i, e in enumerate(exprs):
seen.setdefault(e.lhs, []).append(i)
if not isinstance(e.lhs, Wildcard):
seen.setdefault(e.lhs, []).append(i)
# Optimization: don't waste time reconstructing stuff if already in SSA form
if all(len(i) == 1 for i in seen.values()):
return exprs
Expand Down
3 changes: 2 additions & 1 deletion devito/symbolics/extended_sympy.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
'FieldFromComposite', 'ListInitializer', 'Byref', 'IndexedPointer', 'Cast',
'DefFunction', 'InlineIf', 'Keyword', 'String', 'Macro', 'MacroArgument',
'CustomType', 'Deref', 'INT', 'FLOAT', 'DOUBLE', 'VOID', 'CEIL',
'FLOOR', 'MAX', 'MIN', 'Null', 'SizeOf', 'rfunc', 'cast_mapper']
'FLOOR', 'MAX', 'MIN', 'Null', 'SizeOf', 'rfunc', 'cast_mapper',
'BasicWrapperMixin']


class CondEq(sympy.Eq):
Expand Down