Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge input cubes only once when computing lazy multimodel statistics #2518

Merged
merged 4 commits into from
Oct 14, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 24 additions & 23 deletions esmvalcore/preprocessor/_multimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,8 @@ def _compute_eager(
input_slices = cubes # scalar cubes
else:
input_slices = [cube[chunk] for cube in cubes]
result_slice = _compute(input_slices, operator=operator, **kwargs)
combined_cube = _combine(input_slices)
result_slice = _compute(combined_cube, operator=operator, **kwargs)
result_slices.append(result_slice)

try:
Expand All @@ -498,10 +499,13 @@ def _compute_eager(
return result_cube


def _compute(cubes: list, *, operator: iris.analysis.Aggregator, **kwargs):
def _compute(
cube: iris.cube.Cube,
*,
operator: iris.analysis.Aggregator,
**kwargs,
):
"""Compute statistic."""
cube = _combine(cubes)

with warnings.catch_warnings():
warnings.filterwarnings(
'ignore',
Expand All @@ -526,8 +530,6 @@ def _compute(cubes: list, *, operator: iris.analysis.Aggregator, **kwargs):

# Remove concatenation dimension added by _combine
result_cube.remove_coord(CONCAT_DIM)
for cube in cubes:
cube.remove_coord(CONCAT_DIM)

# some iris aggregators modify dtype, see e.g.
# https://numpy.org/doc/stable/reference/generated/numpy.ma.average.html
Expand All @@ -540,7 +542,7 @@ def _compute(cubes: list, *, operator: iris.analysis.Aggregator, **kwargs):
method=cell_method.method,
coords=cell_method.coord_names,
intervals=cell_method.intervals,
comments=f'input_cubes: {len(cubes)}')
)
result_cube.add_cell_method(updated_method)
return result_cube

Expand Down Expand Up @@ -596,27 +598,26 @@ def _multicube_statistics(
# Calculate statistics
statistics_cubes = {}
lazy_input = any(cube.has_lazy_data() for cube in cubes)
for stat in statistics:
(stat_id, result_cube) = _compute_statistic(cubes, lazy_input, stat)
combined_cube = None
for statistic in statistics:
stat_id = _get_stat_identifier(statistic)
logger.debug('Multicube statistics: computing: %s', stat_id)

(operator, kwargs) = _get_operator_and_kwargs(statistic)
(agg, agg_kwargs) = get_iris_aggregator(operator, **kwargs)
if lazy_input and agg.lazy_func is not None:
if combined_cube is None:
# Merge input cubes only once as this is can be computationally
# expensive.
combined_cube = _combine(cubes)
result_cube = _compute(combined_cube, operator=agg, **agg_kwargs)
else:
result_cube = _compute_eager(cubes, operator=agg, **agg_kwargs)
statistics_cubes[stat_id] = result_cube

return statistics_cubes


def _compute_statistic(cubes, lazy_input, statistic):
"""Compute a single statistic."""
stat_id = _get_stat_identifier(statistic)
logger.debug('Multicube statistics: computing: %s', stat_id)

(operator, kwargs) = _get_operator_and_kwargs(statistic)
(agg, agg_kwargs) = get_iris_aggregator(operator, **kwargs)
if lazy_input and agg.lazy_func is not None:
result_cube = _compute(cubes, operator=agg, **agg_kwargs)
else:
result_cube = _compute_eager(cubes, operator=agg, **agg_kwargs)
return (stat_id, result_cube)


def _multiproduct_statistics(
products,
statistics,
Expand Down