From 75c80706734a44318dd0b75fbeca8eb2fc51134e Mon Sep 17 00:00:00 2001 From: Kevin Oberlies Date: Fri, 27 Sep 2024 13:02:19 -0700 Subject: [PATCH] [ES-8436] feat: Change Dockerfile base images to Wolfi (#1871) * feat: Change Dockerfile base images to Wolfi Running the docker image with a mounted `/rally/.rally` directory that already contains the configuration file didn't work. So I had to change how we parse the logging config and rally.ini files. Now we delay the parsing of the environment variables to read time, to make running from docker and outside of docker compatible with each other. --- docker/Dockerfiles/Dockerfile-dev | 70 ++++++++++++--------------- docker/Dockerfiles/Dockerfile-release | 42 +++++++++++----- docs/configuration.rst | 45 +++++++++++++++++ docs/docker.rst | 11 +++++ esrally/config.py | 7 ++- esrally/log.py | 23 +++++++-- esrally/resources/logging.json | 8 +-- 7 files changed, 145 insertions(+), 61 deletions(-) diff --git a/docker/Dockerfiles/Dockerfile-dev b/docker/Dockerfiles/Dockerfile-dev index 4eb52c924..de506a8c0 100644 --- a/docker/Dockerfiles/Dockerfile-dev +++ b/docker/Dockerfiles/Dockerfile-dev @@ -1,51 +1,31 @@ -################################################################################ -# Build stage 0 `builder`: -# Install Rally from source inside a virtualenv -################################################################################ +FROM docker.elastic.co/wolfi/python:3.12.3-dev AS builder -FROM python:3.8.13-slim-bullseye as builder +USER root -RUN apt-get -y update && \ - apt-get install -y curl git gcc && \ - apt-get -y upgrade && \ - rm -rf /var/lib/apt/lists/* +RUN apk update +RUN apk add curl git gcc pigz bash zstd bzip2 gzip -RUN mkdir -p /rally/esrally -COPY pyproject.toml /rally/ -COPY README.md /rally/ -COPY esrally/ /rally/esrally/ - -RUN python3 -m venv /rally/venv -ENV PATH="/rally/venv/bin:$PATH" - -WORKDIR /rally -# Wipe away any lingering caches, copied over from the local machine -RUN find /rally -name "__pycache__" -exec rm -rf -- \{\} \; 2>/dev/null || true -RUN find /rally -name ".pyc" -exec rm -rf -- \{\} \; 2>/dev/null || true -RUN pip3 install --upgrade hatch hatchling pip wheel -RUN pip3 install /rally +# pbzip2 doesn't have a package for wolfi, so we build it from source +RUN apk add bzip2-dev make wget -################################################################################ -# Build stage 1 (the actual Rally image): -# Copy Rally from stage 0 and fix permissions to support randomized UIDs -# Define VOLUME for ~/.rally -# Add entrypoint -################################################################################ +RUN cd /tmp && \ + wget -q https://launchpad.net/pbzip2/1.1/1.1.13/+download/pbzip2-1.1.13.tar.gz && \ + tar -xzf pbzip2-1.1.13.tar.gz && \ + cd pbzip2-1.1.13/ && \ + make install && \ + rm -r /tmp/pbzip2-1.1.13/ -FROM python:3.8.12-slim-bullseye +FROM docker.elastic.co/wolfi/python:3.12.3-dev ARG RALLY_VERSION ARG RALLY_LICENSE -ENV RALLY_RUNNING_IN_DOCKER True -RUN apt-get -y update && \ - apt-get install -y curl git pbzip2 pigz && \ - apt-get -y upgrade && \ - rm -rf /var/lib/apt/lists/* +ENV RALLY_RUNNING_IN_DOCKER=True -RUN groupadd --gid 1000 rally && \ - useradd -d /rally -m -k /dev/null -g 1000 -N -u 1000 -l -s /bin/bash rally +USER root +COPY --from=builder /usr/bin/ /usr/bin/ -COPY --chown=1000:0 --from=builder /rally/venv /rally/venv +RUN addgroup --gid 1000 rally && \ + adduser --system --home /rally --ingroup rally --no-create-home --uid 1000 --shell /bin/bash rally WORKDIR /rally COPY --chown=1000:0 docker/bin/entrypoint.sh /entrypoint.sh @@ -57,12 +37,24 @@ RUN chgrp 0 /entrypoint.sh && \ chmod 0775 /entrypoint.sh RUN mkdir -p /rally/.rally && \ - chown -R 1000:0 /rally/.rally + chown -R 1000:0 /rally/ USER 1000 +RUN mkdir -p /rally/esrally +COPY pyproject.toml /rally/ +COPY README.md /rally/ +COPY esrally/ /rally/esrally/ + +RUN python3 -m venv /rally/venv ENV PATH=/rally/venv/bin:$PATH +# Wipe away any lingering caches, copied over from the local machine +RUN find /rally -name "__pycache__" -exec rm -rf -- \{\} \; 2>/dev/null || true +RUN find /rally -name ".pyc" -exec rm -rf -- \{\} \; 2>/dev/null || true +RUN python3 -m pip install --upgrade hatch hatchling pip wheel +RUN python3 -m pip install /rally + LABEL org.label-schema.schema-version="1.0" \ org.label-schema.vendor="Elastic" \ org.label-schema.name="rally" \ diff --git a/docker/Dockerfiles/Dockerfile-release b/docker/Dockerfiles/Dockerfile-release index 22dc13f6c..ff6be8f1c 100644 --- a/docker/Dockerfiles/Dockerfile-release +++ b/docker/Dockerfiles/Dockerfile-release @@ -1,19 +1,34 @@ -FROM python:3.8.13-slim-bullseye +FROM docker.elastic.co/wolfi/python:3.12.3-dev AS builder + +USER root + +RUN apk update +RUN apk add curl git gcc pigz bash zstd bzip2 gzip + +# pbzip2 doesn't have a package for wolfi, so we build it from source +RUN apk add bzip2-dev make wget + + +RUN cd /tmp && \ + wget -q https://launchpad.net/pbzip2/1.1/1.1.13/+download/pbzip2-1.1.13.tar.gz && \ + tar -xzf pbzip2-1.1.13.tar.gz && \ + cd pbzip2-1.1.13/ && \ + make install && \ + rm -r /tmp/pbzip2-1.1.13/ + +FROM docker.elastic.co/wolfi/python:3.12.3-dev ARG RALLY_VERSION ARG RALLY_LICENSE -ENV RALLY_RUNNING_IN_DOCKER True +ENV RALLY_RUNNING_IN_DOCKER=True -RUN apt-get -y update && \ - apt-get install -y curl git gcc pbzip2 pigz && \ - apt-get -y upgrade && \ - rm -rf /var/lib/apt/lists/* +USER root +COPY --from=builder /usr/bin/ /usr/bin/ -RUN groupadd --gid 1000 rally && \ - useradd -d /rally -m -k /dev/null -g 1000 -N -u 1000 -l -s /bin/bash rally +RUN addgroup --gid 1000 rally && \ + adduser --system --home /rally --ingroup rally --no-create-home --uid 1000 --shell /bin/bash rally -RUN pip3 install --upgrade hatch hatchling pip wheel -RUN pip3 install esrally==$RALLY_VERSION +RUN mkdir /rally/ WORKDIR /rally COPY --chown=1000:0 docker/bin/entrypoint.sh /entrypoint.sh @@ -25,12 +40,17 @@ RUN chgrp 0 /entrypoint.sh && \ chmod 0775 /entrypoint.sh RUN mkdir -p /rally/.rally && \ - chown -R 1000:0 /rally/.rally + chown -R 1000:0 /rally/ USER 1000 +RUN python3 -m venv /rally/venv ENV PATH=/rally/venv/bin:$PATH +RUN python3 -m pip install --upgrade hatch hatchling pip wheel +RUN python3 -m pip install esrally==$RALLY_VERSION + + LABEL org.label-schema.schema-version="1.0" \ org.label-schema.vendor="Elastic" \ org.label-schema.name="rally" \ diff --git a/docs/configuration.rst b/docs/configuration.rst index 57b7d8693..17de5a0c7 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -6,6 +6,10 @@ Rally Configuration Rally stores its configuration in the file ``~/.rally/rally.ini`` which is automatically created the first time Rally is executed. It comprises the following sections. +.. note:: + The configuration file can use `${CONFIG_DIR}` to refer to the directory where Rally stores its configuration files. This is useful for configuring Rally in a portable way. + This defaults to `~/.rally`, but can be overridden by setting the `RALLY_HOME` environment variable in your shell. + meta ~~~~ @@ -239,3 +243,44 @@ With the following configuration Rally will log all output to standard error:: } } } + +Portability +~~~~~~~~~~~ + +You can also use ``${LOG_PATH}`` in the ``"filename"`` value of the handler you are configuring to make the log configuration more portable. +Rally will substitute ``${LOG_PATH}`` with the path to the directory where Rally stores its log files. By default, this is ``~/.rally/logs``. +But this can be overridden by setting the ``RALLY_HOME`` environment variable in your shell, and logs will be stored in ``${RALLY_HOME}/logs``. + +NOTE:: This is only supported with the ``esrally.log.configure_file_handler`` and ``esrally.log.configure_profile_file_handler`` handlers. + +Here is an example of a logging configuration that uses ``${LOG_PATH}``:: + + { + "version": 1, + "formatters": { + "normal": { + "format": "%(asctime)s,%(msecs)d %(actorAddress)s/PID:%(process)d %(name)s %(levelname)s %(message)s", + "datefmt": "%Y-%m-%d %H:%M:%S", + "()": "esrally.log.configure_utc_formatter" + } + }, + "handlers": { + "rally_log_handler": { + "()": "esrally.log.configure_file_handler", # <-- use configure_file_handler or configure_profile_file_handler + "filename": "${LOG_PATH}/rally.log", # <-- use ${LOG_PATH} here + "encoding": "UTF-8", + "formatter": "normal" + } + }, + "root": { + "handlers": ["rally_log_handler"], + "level": "INFO" + }, + "loggers": { + "elasticsearch": { + "handlers": ["rally_log_handler"], + "level": "WARNING", + "propagate": false + } + } + } diff --git a/docs/docker.rst b/docs/docker.rst index 2bcc5105f..9bf42348b 100644 --- a/docs/docker.rst +++ b/docs/docker.rst @@ -70,6 +70,17 @@ To customize Rally you can create your own ``rally.ini`` and bind mount it using docker run -v /home//custom_rally.ini:/rally/.rally/rally.ini elastic/rally ... +As a quality of life improvement, the ``rally.ini`` and ``logging.json`` files can use ``${CONFIG_DIR}`` in ``rally.ini`` and ``${LOG_PATH}`` in ``logging.json`` in order to +make the files more portable. For example: + +* In ``rally.ini``, you can set ``root.dir = ${CONFIG_DIR}/benchmarks`` instead of hard-coding the path as ``/rally/.rally/benchmarks`` +* In ``logging.json``, you can set ``"filename": "${LOG_PATH}/rally.log"`` instead of hard-coding the path as ``"filename": "/rally/.rally/logs/rally.log"`` + +These files can then be used with the docker image, with the entire local ``~/.rally`` directory mounted as follows:: + + docker run -v type=bind,source=$HOME/.rally,target=/rally/.rally elastic/rally ... + + Persistence ----------- diff --git a/esrally/config.py b/esrally/config.py index 3c4513a5b..9f6c35cbd 100644 --- a/esrally/config.py +++ b/esrally/config.py @@ -52,7 +52,10 @@ def present(self): def load(self) -> configparser.ConfigParser: config = configparser.ConfigParser() - config.read(self.location, encoding="utf-8") + with open(self.location, encoding="utf-8") as src: + contents = src.read() + contents = Template(contents).substitute(CONFIG_DIR=self.config_dir) + config.read_string(contents, source=self.location) return config def store_default_config(self, template_path=None): @@ -64,7 +67,7 @@ def store_default_config(self, template_path=None): with open(self.location, "w", encoding="utf-8") as target: with open(source_path, encoding="utf-8") as src: contents = src.read() - target.write(Template(contents).substitute(CONFIG_DIR=self.config_dir)) + target.write(contents) def store(self, config: configparser.ConfigParser): io.ensure_dir(self.config_dir) diff --git a/esrally/log.py b/esrally/log.py index a0381b8cf..d6b547676 100644 --- a/esrally/log.py +++ b/esrally/log.py @@ -95,16 +95,29 @@ def install_default_log_config(): source_path = io.normalize_path(os.path.join(os.path.dirname(__file__), "resources", "logging.json")) with open(log_config, "w", encoding="UTF-8") as target: with open(source_path, encoding="UTF-8") as src: - # Ensure we have a trailing path separator as after LOG_PATH there will only be the file name - log_path = os.path.join(paths.logs(), "") - # the logging path might contain backslashes that we need to escape - log_path = io.escape_path(log_path) - contents = src.read().replace("${LOG_PATH}", log_path) + contents = src.read() target.write(contents) add_missing_loggers_to_config() io.ensure_dir(paths.logs()) +# pylint: disable=unused-argument +def configure_file_handler(*args, **kwargs) -> logging.Handler: + """ + Configures the WatchedFileHandler supporting expansion of `~` and `${LOG_PATH}` to the user's home and the log path respectively. + """ + filename = kwargs.pop("filename").replace("${LOG_PATH}", paths.logs()) + return logging.handlers.WatchedFileHandler(filename=filename, encoding=kwargs["encoding"], delay=kwargs.get("delay", False)) + + +def configure_profile_file_handler(*args, **kwargs) -> logging.Handler: + """ + Configures the FileHandler supporting expansion of `~` and `${LOG_PATH}` to the user's home and the log path respectively. + """ + filename = kwargs.pop("filename").replace("${LOG_PATH}", paths.logs()) + return logging.FileHandler(filename=filename, encoding=kwargs["encoding"], delay=kwargs.get("delay", False)) + + def load_configuration(): """ Loads the logging configuration. This is a low-level method and usually diff --git a/esrally/resources/logging.json b/esrally/resources/logging.json index ba559e235..ee98cef08 100644 --- a/esrally/resources/logging.json +++ b/esrally/resources/logging.json @@ -19,15 +19,15 @@ }, "handlers": { "rally_log_handler": { - "class": "logging.handlers.WatchedFileHandler", - "filename": "${LOG_PATH}rally.log", + "()": "esrally.log.configure_file_handler", + "filename": "${LOG_PATH}/rally.log", "encoding": "UTF-8", "formatter": "normal", "filters": ["isActorLog"] }, "rally_profile_handler": { - "class": "logging.FileHandler", - "filename": "${LOG_PATH}profile.log", + "()": "esrally.log.configure_profile_file_handler", + "filename": "${LOG_PATH}/profile.log", "delay": true, "encoding": "UTF-8", "formatter": "profile"