Skip to content

Commit

Permalink
compiler: prevent reduction clause for perfect-enough outer loops
Browse files Browse the repository at this point in the history
  • Loading branch information
mloubout committed Oct 6, 2023
1 parent e6cd0b0 commit 51be20c
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 2 deletions.
10 changes: 10 additions & 0 deletions devito/passes/iet/parpragma.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,16 @@ def _make_reductions(self, partree):
if not any(i.is_ParallelAtomic for i in partree.collapsed):
return partree

# We bypass the corner case where a reduction might not be optimal, mainly:
# - Only the most inner loop is atomic
# In which case we can parallelize the perfect nest
# The opposite corner case (most outer loop atomic)
# should be detected before this pass
nc = len(partree.collapsed)
if all(i.is_ParallelNoAtomic for i in partree.collapsed[:nc-1]):
mapper = {partree.root: partree.root._rebuild(ncollapsed=nc-1)}
return Transformer(mapper).visit(partree)

exprs = [i for i in FindNodes(Expression).visit(partree) if i.is_reduction]

reductions = []
Expand Down
23 changes: 22 additions & 1 deletion tests/test_dle.py
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,6 @@ def test_incs_no_atomic(self):
op0 = Operator(Inc(uf, 1), opt=('advanced', {'openmp': True,
'par-collapse-ncores': 1,
'par-collapse-work': 0}))

assert 'collapse(3)' in str(op0)
assert 'atomic' in str(op0)

Expand All @@ -875,6 +874,28 @@ def test_incs_no_atomic(self):
assert 'collapse' not in str(op1)
assert 'atomic' not in str(op1)

def test_incr_perfect_outer(self):
grid = Grid((5, 5))
d = Dimension(name="d")
u = Function(name="u", dimensions=(*grid.dimensions, d),
grid=grid, shape=(*grid.shape, 5), )
v = Function(name="v", dimensions=(*grid.dimensions, d),
grid=grid, shape=(*grid.shape, 5))
u.data.fill(1)
v.data.fill(2)

w = Function(name="w", grid=grid)

summation = Inc(w, u*v)

op0 = Operator([summation])
assert 'reduction' not in str(op0)
assert 'collapse(2)' in str(op0)
assert 'omp for' in str(op0)

op0()
assert np.all(w.data == 10)

@pytest.mark.parametrize('exprs,simd_level,expected', [
(['Eq(y.symbolic_max, g[0, x], implicit_dims=(t, x))',
'Inc(h1[0, 0], 1, implicit_dims=(t, x, y))'],
Expand Down
24 changes: 23 additions & 1 deletion tests/test_gpu_openacc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np

from devito import (Grid, Function, TimeFunction, SparseTimeFunction, Eq, Operator,
norm, solve)
norm, solve, Dimension, Inc)
from conftest import skipif, assert_blocking, opts_device_tiling
from devito.data import LEFT
from devito.exceptions import InvalidOperator
Expand Down Expand Up @@ -168,6 +168,28 @@ def test_multi_tile_blocking_structure(self):
assert len(iters) == len(v)
assert all(i.step == j for i, j in zip(iters, v))

def test_incr_perfect_outer(self):
grid = Grid((5, 5))
d = Dimension(name="d")
u = Function(name="u", dimensions=(*grid.dimensions, d),
grid=grid, shape=(*grid.shape, 5), )
v = Function(name="v", dimensions=(*grid.dimensions, d),
grid=grid, shape=(*grid.shape, 5))
u.data.fill(1)
v.data.fill(2)

w = Function(name="w", grid=grid)

summation = Inc(w, u*v)

op0 = Operator([summation])
assert 'reduction' not in str(op0)
assert 'collapse(2)' in str(op0)
assert 'acc parallel loop' in str(op0)

op0()
assert np.all(w.data == 10)


class TestOperator(object):

Expand Down
22 changes: 22 additions & 0 deletions tests/test_gpu_openmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,28 @@ def test_timeparallel_reduction(self):
('omp target teams distribute parallel for collapse(3)'
' reduction(+:f[0])')

def test_incr_perfect_outer(self):
grid = Grid((5, 5))
d = Dimension(name="d")
u = Function(name="u", dimensions=(*grid.dimensions, d),
grid=grid, shape=(*grid.shape, 5), )
v = Function(name="v", dimensions=(*grid.dimensions, d),
grid=grid, shape=(*grid.shape, 5))
u.data.fill(1)
v.data.fill(2)

w = Function(name="w", grid=grid)

summation = Inc(w, u*v)

op0 = Operator([summation])
assert 'reduction' not in str(op0)
assert 'collapse(2)' in str(op0)
assert 'omp target teams distribute parallel' in str(op0)

op0()
assert np.all(w.data == 10)


class TestOperator(object):

Expand Down

0 comments on commit 51be20c

Please sign in to comment.