diff --git a/.flake8 b/.flake8 index 855c8d8a..03e9737e 100644 --- a/.flake8 +++ b/.flake8 @@ -35,3 +35,4 @@ exclude = build esmf_regrid/__init__.py esmf_regrid/tests/results + benchmarks/* diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 9f00653a..d71bce37 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -73,9 +73,7 @@ jobs: - name: Benchmark script run: | if ${{ github.event_name != 'pull_request' }}; then export COMPARE="HEAD~"; else export COMPARE="origin/${{ github.base_ref }}"; fi; - nox --session=tests --install-only - export DATA_GEN_PYTHON=$(realpath $(find .nox -path "*tests/bin/python")) - nox --session="benchmarks(branch)" -- "${COMPARE}" + python benchmarks/bm_runner.py branch ${COMPARE} - name: Archive ASV results uses: actions/upload-artifact@v4 diff --git a/CHANGELOG.md b/CHANGELOG.md index 17bc299e..c5daaeec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [Unreleased] + +### Changed + +- [PR#361](https://github.com/SciTools-incubator/iris-esmf-regrid/pull/361) + Moved the code for running benchmarks to `bm_runner.py` in line with iris + benchmarks. + [@stephenworsley](https://github.com/stephenworsley) + ## [0.9] - 2023-11-03 ### Added diff --git a/benchmarks/README.md b/benchmarks/README.md index aa7ec2ac..6c71eeec 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -16,16 +16,27 @@ raising a ❌ failure. installed, as well as Nox (see [Benchmark environments](#benchmark-environments)). -[iris-esmf-regrid's noxfile](../noxfile.py) includes a `benchmarks` session -that provides conveniences for setting up before benchmarking, and can also -replicate the CI run locally. See the session docstring for detail. +The benchmark runner ([bm_runner.py](./bm_runner.py)) provides conveniences for +common benchmark setup and run tasks, including replicating the automated +overnight run locally. See `python bm_runner.py --help` for detail. + +A significant portion of benchmark run time is environment management. Run-time +can be reduced by placing the benchmark environment on the same file system as +your +[Conda package cache](https://conda.io/projects/conda/en/latest/user-guide/configuration/use-condarc.html#specify-pkg-directories), +if it is not already. You can achieve this by either: + +- Temporarily reconfiguring `delegated_env_commands` and `delegated_env_parent` + in [asv.conf.json](asv.conf.json) to reference a location on the same file + system as the Conda package cache. +- Moving your Iris repo to the same file system as the Conda package cache. ### Environment variables * `DATA_GEN_PYTHON` - required - path to a Python executable that can be used to generate benchmark test objects/files; see -[Data generation](#data-generation). The Nox session sets this automatically, -but will defer to any value already set in the shell. +[Data generation](#data-generation). The benchmark runner sets this +automatically, but will defer to any value already set in the shell. * `BENCHMARK_DATA` - optional - path to a directory for benchmark synthetic test data, which the benchmark scripts will create if it doesn't already exist. Defaults to `/benchmarks/.data/` if not set. Note that some of @@ -34,7 +45,7 @@ plan accordingly. * `ON_DEMAND_BENCHMARKS` - optional - when set (to any value): benchmarks decorated with `@on_demand_benchmark` are included in the ASV run. Usually coupled with the ASV `--bench` argument to only run the benchmark(s) of -interest. Is set during the Nox `sperf` session. +interest. Is set during the benchmark runner `sperf` sub-commands. ### Reducing run time diff --git a/benchmarks/asv_delegated_conda.py b/benchmarks/asv_delegated_conda.py index 30810309..ab2478e3 100644 --- a/benchmarks/asv_delegated_conda.py +++ b/benchmarks/asv_delegated_conda.py @@ -189,6 +189,11 @@ def copy_asv_files(src_parent: Path, dst_parent: Path) -> None: # Record new environment information in properties. self._update_info() + def _run_conda(self, args, env=None): + # TODO: remove after airspeed-velocity/asv#1397 is merged and released. + args = ["--yes" if arg == "--force" else arg for arg in args] + return super()._run_conda(args, env) + def checkout_project(self, repo: Repo, commit_hash: str) -> None: """Check out the working tree of the project at given commit hash.""" super().checkout_project(repo, commit_hash) diff --git a/benchmarks/bm_runner.py b/benchmarks/bm_runner.py new file mode 100644 index 00000000..8588cd22 --- /dev/null +++ b/benchmarks/bm_runner.py @@ -0,0 +1,314 @@ +"""Argparse conveniences for executing common types of benchmark runs.""" + +from abc import ABC, abstractmethod +import argparse +from argparse import ArgumentParser +from datetime import datetime +from importlib import import_module +from os import environ +from pathlib import Path +import re +import shlex +import subprocess +from tempfile import NamedTemporaryFile +from typing import Literal + +# The threshold beyond which shifts are 'notable'. See `asv compare`` docs +# for more. +COMPARE_FACTOR = 1.2 + +BENCHMARKS_DIR = Path(__file__).parent +ROOT_DIR = BENCHMARKS_DIR.parent +# Storage location for reports used in GitHub actions. +GH_REPORT_DIR = ROOT_DIR.joinpath(".github", "workflows", "benchmark_reports") + +# Common ASV arguments for all run_types except `custom`. +ASV_HARNESS = "run {posargs} --attribute rounds=4 --interleave-rounds --show-stderr" + + +def _echo(echo_string: str): + # Use subprocess for printing to reduce chance of printing out of sequence + # with the subsequent calls. + subprocess.run(["echo", f"BM_RUNNER DEBUG: {echo_string}"]) + + +def _subprocess_runner(args, asv=False, **kwargs): + # Avoid permanent modifications if the same arguments are used more than once. + args = args.copy() + kwargs = kwargs.copy() + if asv: + args.insert(0, "asv") + kwargs["cwd"] = BENCHMARKS_DIR + _echo(" ".join(args)) + kwargs.setdefault("check", True) + return subprocess.run(args, **kwargs) + + +def _subprocess_runner_capture(args, **kwargs) -> str: + result = _subprocess_runner(args, capture_output=True, **kwargs) + return result.stdout.decode().rstrip() + + +def _check_requirements(package: str) -> None: + try: + import_module(package) + except ImportError as exc: + message = ( + f"No {package} install detected. Benchmarks can only " + f"be run in an environment including {package}." + ) + raise Exception(message) from exc + + +def _prep_data_gen_env() -> None: + """Create or access a separate, unchanging environment for generating test data.""" + python_version = "3.10" + data_gen_var = "DATA_GEN_PYTHON" + if data_gen_var in environ: + _echo("Using existing data generation environment.") + else: + _echo("Setting up the data generation environment ...") + # Get Nox to build an environment for the `tests` session, but don't + # run the session. Will reuse a cached environment if appropriate. + _subprocess_runner( + [ + "nox", + f"--noxfile={ROOT_DIR / 'noxfile.py'}", + "--session=tests", + "--install-only", + f"--python={python_version}", + ] + ) + # Find the environment built above, set it to be the data generation + # environment. + data_gen_python = next( + (ROOT_DIR / ".nox").rglob(f"tests*/bin/python{python_version}") + ).resolve() + environ[data_gen_var] = str(data_gen_python) + + _echo("Data generation environment ready.") + + +def _setup_common() -> None: + _check_requirements("asv") + _check_requirements("nox") + + _prep_data_gen_env() + + _echo("Setting up ASV ...") + _subprocess_runner(["machine", "--yes"], asv=True) + + _echo("Setup complete.") + + +def _asv_compare(*commits: str) -> None: + """Run through a list of commits comparing each one to the next.""" + commits = [commit[:8] for commit in commits] + for i in range(len(commits) - 1): + before = commits[i] + after = commits[i + 1] + asv_command = shlex.split( + f"compare {before} {after} --factor={COMPARE_FACTOR} --split" + ) + + comparison = _subprocess_runner_capture(asv_command, asv=True) + _echo(comparison) + shifts = _subprocess_runner_capture([*asv_command, "--only-changed"], asv=True) + _echo(shifts) + + +class _SubParserGenerator(ABC): + """Convenience for holding all the necessary argparse info in 1 place.""" + + name: str = NotImplemented + description: str = NotImplemented + epilog: str = NotImplemented + + def __init__(self, subparsers: ArgumentParser.add_subparsers) -> None: + self.subparser: ArgumentParser = subparsers.add_parser( + self.name, + description=self.description, + epilog=self.epilog, + formatter_class=argparse.RawTextHelpFormatter, + ) + self.add_arguments() + self.add_asv_arguments() + self.subparser.set_defaults(func=self.func) + + @abstractmethod + def add_arguments(self) -> None: + """All custom self.subparser.add_argument() calls.""" + _ = NotImplemented + + def add_asv_arguments(self) -> None: + self.subparser.add_argument( + "asv_args", + nargs=argparse.REMAINDER, + help="Any number of arguments to pass down to the ASV benchmark command.", + ) + + @staticmethod + @abstractmethod + def func(args: argparse.Namespace): + """Return when the subparser is parsed. + + `func` is then called, performing the user's selected sub-command. + + """ + _ = args + return NotImplemented + + +class Branch(_SubParserGenerator): + """Class for parsing and running the 'branch' argument.""" + + name = "branch" + description = ( + "Benchmarks the two commits,``HEAD``, and ``HEAD``'s merge-base with the " + "input **base_branch**. If running on GitHub Actions: HEAD will be " + "GitHub's merge commit and merge-base will be the merge target. Performance " + "comparisons will be posted in the CI run which will fail if regressions " + "exceed the tolerance.\n" + "Uses `asv run`." + ) + epilog = ( + "e.g. python bm_runner.py branch upstream/main\n" + "e.g. python bm_runner.py branch upstream/main --bench=regridding" + ) + + def add_arguments(self) -> None: + self.subparser.add_argument( + "base_branch", + type=str, + help="A branch that has the merge-base with ``HEAD`` - ``HEAD`` will be benchmarked against that merge-base.", + ) + + @staticmethod + def func(args: argparse.Namespace) -> None: + _setup_common() + + git_command = shlex.split("git rev-parse HEAD") + head_sha = _subprocess_runner_capture(git_command)[:8] + + git_command = shlex.split(f"git merge-base {head_sha} {args.base_branch}") + merge_base = _subprocess_runner_capture(git_command)[:8] + + with NamedTemporaryFile("w") as hashfile: + hashfile.writelines([merge_base, "\n", head_sha]) + hashfile.flush() + commit_range = f"HASHFILE:{hashfile.name}" + asv_command = shlex.split(ASV_HARNESS.format(posargs=commit_range)) + _subprocess_runner([*asv_command, *args.asv_args], asv=True) + + _asv_compare(merge_base, head_sha) + + +class SPerf(_SubParserGenerator): + """Class for parsing and running the 'sperf' argument.""" + + name = "sperf" + description = ( + "Run the on-demand Sperf suite of benchmarks (measuring " + "scalability) for the ``HEAD`` of ``upstream/main`` only, " + "and publish the results to the input **publish_dir**, within a " + "unique subdirectory for this run.\n" + "Uses `asv run`." + ) + epilog = ( + "e.g. python bm_runner.py sperf my_publish_dir\n" + "e.g. python bm_runner.py sperf my_publish_dir --bench=regridding" + ) + + def add_arguments(self) -> None: + self.subparser.add_argument( + "publish_dir", + type=str, + help="HTML results will be published to a sub-dir in this dir.", + ) + + @staticmethod + def func(args: argparse.Namespace) -> None: + _setup_common() + + publish_dir = Path(args.publish_dir) + if not publish_dir.is_dir(): + message = f"Input 'publish directory' is not a directory: {publish_dir}" + raise NotADirectoryError(message) + publish_subdir = ( + publish_dir / f"sperf_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + ) + publish_subdir.mkdir() + + # Activate on demand benchmarks (C/SPerf are deactivated for + # 'standard' runs). + environ["ON_DEMAND_BENCHMARKS"] = "True" + commit_range = "upstream/main^!" + + asv_command = ( + ASV_HARNESS.format(posargs=commit_range) + " --bench=.*Scalability.*" + ) + + # Only do a single round. + asv_command = shlex.split(re.sub(r"rounds=\d", "rounds=1", asv_command)) + try: + _subprocess_runner([*asv_command, *args.asv_args], asv=True) + except subprocess.CalledProcessError as err: + # C/SPerf benchmarks are much bigger than the CI ones: + # Don't fail the whole run if memory blows on 1 benchmark. + # ASV produces return code of 2 if the run includes crashes. + if err.returncode != 2: + raise + + asv_command = shlex.split(f"publish {commit_range} --html-dir={publish_subdir}") + _subprocess_runner(asv_command, asv=True) + + # Print completion message. + location = BENCHMARKS_DIR / ".asv" + _echo( + f'New ASV results for "sperf".\n' + f'See "{publish_subdir}",' + f'\n or JSON files under "{location / "results"}".' + ) + + +class Custom(_SubParserGenerator): + """Class for parsing and running the 'custom' argument.""" + + name = "custom" + description = ( + "Run ASV with the input **ASV sub-command**, without any preset " + "arguments - must all be supplied by the user. So just like running " + "ASV manually, with the convenience of re-using the runner's " + "scripted setup steps." + ) + epilog = "e.g. python bm_runner.py custom continuous a1b23d4 HEAD --quick" + + def add_arguments(self) -> None: + self.subparser.add_argument( + "asv_sub_command", + type=str, + help="The ASV command to run.", + ) + + @staticmethod + def func(args: argparse.Namespace) -> None: + _setup_common() + _subprocess_runner([args.asv_sub_command, *args.asv_args], asv=True) + + +def main(): + parser = ArgumentParser( + description="Run the Iris performance benchmarks (using Airspeed Velocity).", + epilog="More help is available within each sub-command.", + ) + subparsers = parser.add_subparsers(required=True) + + for gen in (Branch, SPerf, Custom): + _ = gen(subparsers).subparser + + parsed = parser.parse_args() + parsed.func(parsed) + + +if __name__ == "__main__": + main() diff --git a/noxfile.py b/noxfile.py index 58b1436b..2cefbf00 100644 --- a/noxfile.py +++ b/noxfile.py @@ -5,11 +5,9 @@ """ -from datetime import datetime import os from pathlib import Path import shutil -from typing import Literal from urllib.error import HTTPError from urllib.parse import urlparse from urllib.request import urlopen @@ -313,177 +311,6 @@ def tests(session: nox.sessions.Session): session.run("pytest") -@nox.session -@nox.parametrize( - "run_type", - ["branch", "sperf", "custom"], - ids=["branch", "sperf", "custom"], -) -def benchmarks( - session: nox.sessions.Session, - run_type: Literal["overnight", "branch", "sperf", "custom"], -): - """ - Perform iris-esmf-regrid performance benchmarks (using Airspeed Velocity). - - All run types require a single Nox positional argument (e.g. - ``nox --session="foo" -- my_pos_arg``) - detailed in the parameters - section - and can optionally accept a series of further arguments that will - be added to session's ASV command. - - Parameters - ---------- - session: object - A `nox.sessions.Session` object. - run_type: {"branch", "sperf", "custom"} - * ``branch``: compares ``HEAD`` and ``HEAD``'s merge-base with the - input **base branch**. Fails if a performance regression is detected. - This is the session used by IER's CI. - * ``sperf``: Run the on-demand SPerf suite of benchmarks (part of the - UK Met Office NG-VAT project) for the ``HEAD`` of ``upstream/main`` - only, and publish the results to the input **publish directory**, - within a unique subdirectory for this run. - * ``custom``: run ASV with the input **ASV sub-command**, without any - preset arguments - must all be supplied by the user. So just like - running ASV manually, with the convenience of re-using the session's - scripted setup steps. - - Examples - -------- - * ``nox --session="benchmarks(branch)" -- upstream/main`` - * ``nox --session="benchmarks(branch)" -- upstream/mesh-data-model`` - * ``nox --session="benchmarks(branch)" -- upstream/main --bench=ci`` - * ``nox --session="benchmarks(sperf)" -- my_publish_dir - * ``nox --session="benchmarks(custom)" -- continuous a1b23d4 HEAD --quick`` - - """ - # Make sure we're not working with a list of Python versions. - if not isinstance(PY_VER, str): - message = ( - "benchmarks session requires PY_VER to be a string - representing " - f"a single Python version - instead got: {type(PY_VER)} ." - ) - raise ValueError(message) - - # The threshold beyond which shifts are 'notable'. See `asv compare`` docs - # for more. - COMPARE_FACTOR = 2.0 - - session.install("asv", "nox", "pyyaml") - session.run("conda", "install", "--yes", "conda<24.3") - - data_gen_var = "DATA_GEN_PYTHON" - if data_gen_var in os.environ: - print("Using existing data generation environment.") - data_gen_python = Path(os.environ[data_gen_var]) - else: - print("Setting up the data generation environment...") - # Get Nox to build an environment for the `tests` session, but don't - # run the session. Will re-use a cached environment if appropriate. - session.run_always( - "nox", - "--session=tests", - "--install-only", - f"--python={PY_VER}", - ) - # Find the environment built above, set it to be the data generation - # environment. - data_gen_python = next( - Path(".nox").rglob(f"tests*/bin/python{PY_VER}") - ).resolve() - session.env[data_gen_var] = data_gen_python - - print("Running ASV...") - session.cd("benchmarks") - # Skip over setup questions for a new machine. - session.run("asv", "machine", "--yes") - - # All run types require one Nox posarg. - run_type_arg = { - "branch": "base branch", - "sperf": "publish directory", - "custom": "ASV sub-command", - } - if run_type not in run_type_arg.keys(): - message = f"Unsupported run-type: {run_type}" - raise NotImplementedError(message) - if not session.posargs: - message = ( - f"Missing mandatory first Nox session posarg: " f"{run_type_arg[run_type]}" - ) - raise ValueError(message) - first_arg = session.posargs[0] - # Optional extra arguments to be passed down to ASV. - asv_args = session.posargs[1:] - - if run_type == "branch": - base_branch = first_arg - git_command = f"git merge-base HEAD {base_branch}" - merge_base = session.run(*git_command.split(" "), silent=True, external=True)[ - :8 - ] - - try: - asv_command = [ - "asv", - "continuous", - merge_base, - "HEAD", - f"--factor={COMPARE_FACTOR}", - ] - session.run(*asv_command, *asv_args) - finally: - asv_command = [ - "asv", - "compare", - merge_base, - "HEAD", - f"--factor={COMPARE_FACTOR}", - "--split", - ] - session.run(*asv_command) - - elif run_type == "sperf": - publish_dir = Path(first_arg) - if not publish_dir.is_dir(): - message = f"Input 'publish directory' is not a directory: {publish_dir}" - raise NotADirectoryError(message) - publish_subdir = ( - publish_dir / f"{run_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}" - ) - publish_subdir.mkdir() - - # Activate on demand benchmarks (C/SPerf are deactivated for 'standard' runs). - session.env["ON_DEMAND_BENCHMARKS"] = "True" - commit_range = "upstream/main^!" - - asv_command = [ - "asv", - "run", - commit_range, - "--bench=.*Scalability.*", - "--attribute", - "rounds=1", - ] - session.run(*asv_command, *asv_args) - - asv_command = ["asv", "publish", commit_range, f"--html-dir={publish_subdir}"] - session.run(*asv_command) - - # Print completion message. - location = Path().cwd() / ".asv" - print( - f'New ASV results for "{run_type}".\n' - f'See "{publish_subdir}",' - f'\n or JSON files under "{location / "results"}".' - ) - - else: - asv_subcommand = first_arg - assert run_type == "custom" - session.run("asv", asv_subcommand, *asv_args) - - @nox.session(python=PY_VER, venv_backend="conda") def wheel(session: nox.sessions.Session): """