From ac680eda733f975a335f5296a64907adb233cbeb Mon Sep 17 00:00:00 2001 From: Kevin Oberlies Date: Tue, 12 Nov 2024 10:57:34 -0800 Subject: [PATCH] feat: Add ability to run esrallyd inside docker [ES-9146] (#1885) This will have esrallyd wait for all child processes to finish if it detects it's running inside docker. I'm also adding a json logging library to easily enable sending rally logs to a kibana cluster with filebeat --- docker/Dockerfiles/dev/Dockerfile | 2 +- docker/Dockerfiles/release/Dockerfile | 2 +- docs/configuration.rst | 48 +++++++++++++++++++++++++++ esrally/rally.py | 5 ++- esrally/rallyd.py | 14 ++++++-- esrally/utils/process.py | 27 ++++++++++++++- pyproject.toml | 2 ++ 7 files changed, 92 insertions(+), 8 deletions(-) diff --git a/docker/Dockerfiles/dev/Dockerfile b/docker/Dockerfiles/dev/Dockerfile index 9bc3ceefc..ac98d8c2a 100644 --- a/docker/Dockerfiles/dev/Dockerfile +++ b/docker/Dockerfiles/dev/Dockerfile @@ -8,7 +8,7 @@ ENV RALLY_RUNNING_IN_DOCKER=True USER root RUN apk update -RUN apk add curl git gcc pigz bash zstd bzip2 gzip openssh +RUN apk add curl git gcc pigz bash zstd bzip2 gzip openssh procps # pbzip2 doesn't have a package for wolfi, so we build it from source RUN apk add bzip2-dev make wget diff --git a/docker/Dockerfiles/release/Dockerfile b/docker/Dockerfiles/release/Dockerfile index ef14479a3..413456437 100644 --- a/docker/Dockerfiles/release/Dockerfile +++ b/docker/Dockerfiles/release/Dockerfile @@ -7,7 +7,7 @@ ENV RALLY_RUNNING_IN_DOCKER=True USER root RUN apk update -RUN apk add curl git gcc pigz bash zstd bzip2 gzip openssh +RUN apk add curl git gcc pigz bash zstd bzip2 gzip openssh procps # pbzip2 doesn't have a package for wolfi, so we build it from source RUN apk add bzip2-dev make wget diff --git a/docs/configuration.rst b/docs/configuration.rst index 17de5a0c7..9f136a38f 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -284,3 +284,51 @@ Here is an example of a logging configuration that uses ``${LOG_PATH}``:: } } } + + +Example +~~~~~~~ + +With the following configuration Rally will log to ``~/.rally/logs/rally.log`` and ``~/.rally/logs/rally.json``, the +latter being a JSON file:: + + { + "version": 1, + "formatters": { + "normal": { + "format": "%(asctime)s,%(msecs)d %(actorAddress)s/PID:%(process)d %(name)s %(levelname)s %(message)s", + "datefmt": "%Y-%m-%d %H:%M:%S", + "()": "esrally.log.configure_utc_formatter" + }, + "json": { + "datefmt": "%Y-%m-%d %H:%M:%S", + "class": "pythonjsonlogger.jsonlogger.JsonFormatter" + } + }, + "handlers": { + "rally_log_handler": { + "()": "esrally.log.configure_file_handler", + "filename": "${LOG_PATH}/rally.log", + "encoding": "UTF-8", + "formatter": "normal" + }, + "rally_json_handler": { + "()": "esrally.log.configure_file_handler", + "filename": "${LOG_PATH}/rally.json", + "encoding": "UTF-8", + "formatter": "json" + } + }, + "root": { + "handlers": ["rally_log_handler", "rally_json_handler"], + "level": "INFO" + }, + "loggers": { + "elasticsearch": { + "handlers": ["rally_log_handler", "rally_json_handler"], + "level": "WARNING", + "propagate": false + } + } + } + diff --git a/esrally/rally.py b/esrally/rally.py index 29d526ea9..7fb1e7934 100644 --- a/esrally/rally.py +++ b/esrally/rally.py @@ -1287,10 +1287,9 @@ def _trap(function, path, exc_info): logger.info("Cleaning track dependency directory [%s]...", paths.libs()) if sys.version_info.major == 3 and sys.version_info.minor <= 11: - # pylint: disable=deprecated-argument - shutil.rmtree(paths.libs(), onerror=_trap) + shutil.rmtree(paths.libs(), onerror=_trap) # pylint: disable=deprecated-argument, disable=useless-suppression else: - shutil.rmtree(paths.libs(), onexc=_trap_exc) + shutil.rmtree(paths.libs(), onexc=_trap_exc) # pylint: disable=unexpected-keyword-arg, disable=useless-suppression result = dispatch_sub_command(arg_parser, args, cfg) diff --git a/esrally/rallyd.py b/esrally/rallyd.py index d8bfd8c7a..fecef88e5 100644 --- a/esrally/rallyd.py +++ b/esrally/rallyd.py @@ -17,6 +17,7 @@ import argparse import logging +import os import sys import time @@ -30,14 +31,23 @@ log, version, ) -from esrally.utils import console +from esrally.utils import console, process def start(args): if actor.actor_system_already_running(): raise exceptions.RallyError("An actor system appears to be already running.") actor.bootstrap_actor_system(local_ip=args.node_ip, coordinator_ip=args.coordinator_ip) - console.info("Successfully started actor system on node [%s] with coordinator node IP [%s]." % (args.node_ip, args.coordinator_ip)) + console.info(f"Successfully started actor system on node [{args.node_ip}] with coordinator node IP [{args.coordinator_ip}].") + + if console.RALLY_RUNNING_IN_DOCKER: + console.info(f"Running with PID: {os.getpid()}") + while process.wait_for_child_processes( + callback=lambda process: console.info(f"Actor with PID [{process.pid}] terminated with status [{process.returncode}]."), + list_callback=lambda children: console.info(f"Waiting for child processes ({[p.pid for p in children]}) to terminate..."), + ): + pass + console.info("All actors terminated, exiting.") def stop(raise_errors=True): diff --git a/esrally/utils/process.py b/esrally/utils/process.py index b6d8d9f1c..ee9ff41c3 100644 --- a/esrally/utils/process.py +++ b/esrally/utils/process.py @@ -20,7 +20,7 @@ import shlex import subprocess import time -from typing import IO, Callable, List, Mapping, Optional, Union +from typing import IO, Callable, Iterable, List, Mapping, Optional, Union import psutil @@ -229,3 +229,28 @@ def rally_process(p: psutil.Process) -> bool: ) kill_all(rally_process) + + +def wait_for_child_processes( + timeout: Optional[float] = None, + callback: Optional[Callable[[psutil.Process], None]] = None, + list_callback: Optional[Callable[[Iterable[psutil.Process]], None]] = None, +) -> bool: + """ + Waits for all child processes to terminate. + + :param timeout: The maximum time to wait for child processes to terminate (default: None). + :param callback: A callback to call as each child process terminates. + The callback will be passed the PID and the return code of the child process. + :param list_callback: A callback to tell caller about the child processes that are being waited for. + + :return: False if no child processes found, True otherwise. + """ + current = psutil.Process() + children = current.children(recursive=True) + if not children: + return False + if list_callback is not None: + list_callback(children) + psutil.wait_procs(children, timeout=timeout, callback=callback) + return True diff --git a/pyproject.toml b/pyproject.toml index 98694685c..557c6b986 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,6 +84,8 @@ dependencies = [ "zstandard==0.21.0", # License: Python Software Foundation License "typing-extensions==4.12.2", + # License: BSD-2-Clause license + "python-json-logger==2.0.7", ] [project.optional-dependencies]