Skip to content

Commit

Permalink
Update container to work with poetry (#272)
Browse files Browse the repository at this point in the history
This PR:

* Creates two Dockerfiles - one for the ingest and one for the import
* Switches the ingest Dockerfile to work with Poetry
* Switches the ingest Dockerfile to a multistage build to better build
`eccodes` and to avoid having build tools into the final image
* Reduces the production image size from 3.5 GB to 604 MB
* Updates the Docker Compose file to work with the new Dockerfiles
* Updates READMEs to reflect the above changes.

The test suite does run. I downloaded the test tarball to my home
directory and was able to do something like:

```console
$ data=$HOME/path/to/tarball public=$HOME/path/to/tarball docker compose run test
```
  • Loading branch information
ian-noaa authored Dec 14, 2023
2 parents f0a4b0f + d82b129 commit c2d19e4
Show file tree
Hide file tree
Showing 7 changed files with 280 additions and 130 deletions.
10 changes: 10 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
# Python
**/.pytest_cache
**/__pycache__/
**/.mypy_cache/
**/.ruff_cache
.venv/
venv/
**/*.pyc
dist/

# Env
**/.env

# Project files
tmp/

# VIM swap files
**/*.swp

# Git & IDEs
.git
.history
.vscode

# Mac
**/.DS_Store
76 changes: 0 additions & 76 deletions Dockerfile

This file was deleted.

24 changes: 7 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ VxIngest is containerized for deployment. If you are developing the application,
You will first need to build the docker container with the following:

```bash
$ docker build \
docker build \
--build-arg BUILDVER=dev \
--build-arg COMMITBRANCH=$(git branch --show-current) \
--build-arg COMMITSHA=$(git rev-parse HEAD) \
--target=prod \
-t vxingest/development/:dev \
-f ./docker/ingest/Dockerfile \
-t vxingest:dev \
.
```

Expand All @@ -49,26 +49,16 @@ cb_collection: "METAR"
Once that's in place, you can run the ingest with Docker Compose like the example below. Note the `public` and `data` env variables respectively point to where the input data resides and where you'd like the container to write out to. They are the only part of the command you would need to modify.

```bash
$ data=/data-ingest/data \
data=/data-ingest/data \
public=/public \
docker compose run ingest python -m ingest \
-c /run/secrets/CREDENTIALS_FILE \
-o /opt/data/test/outdir \
-l /opt/data/test/logs \
-m /opt/data/test/metrics \
-x /opt/data/test/xfer"
docker compose run ingest
```

You can run the "import" via Docker Compose like this example. You will need to use the same value for `data` as you used for the "ingest".

```bash
$ data=/data-ingest/data \
docker compose run import python -m ingest \
-c /run/secrets/CREDENTIALS_FILE \
-o /opt/data/test/outdir \
-l /opt/data/test/logs \
-m /opt/data/test/metrics \
-x /opt/data/test/xfer"
data=/data-ingest/data \
docker compose run import
```

## Diagrams
Expand Down
51 changes: 29 additions & 22 deletions compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@ secrets:

services:
shell:
image : docker.io/vxingest/development:dev
build:
context: .
dockerfile: docker/ingest/Dockerfile
target: prod
volumes:
- ${data}:/opt/data
- ${public}:/public
Expand All @@ -14,41 +17,45 @@ services:
environment:
- CREDENTIALS=/run/secrets/CREDENTIALS_FILE
command: /bin/bash
user: amb-verif
unit_test:
image : docker.io/vxingest/development:dev
test:
build:
context: .
dockerfile: docker/ingest/Dockerfile
target: dev
volumes:
- ${data}:/opt/data
secrets:
- CREDENTIALS_FILE
environment:
- CREDENTIALS=/run/secrets/CREDENTIALS_FILE
command: ./scripts/VXingest_utilities/run_unit_tests.sh
user: amb-verif
int_test:
image : docker.io/vxingest/development:dev
volumes:
- ${data}:/opt/data
secrets:
- CREDENTIALS_FILE
environment:
- CREDENTIALS=/run/secrets/CREDENTIALS_FILE
command: ./scripts/VXingest_utilities/run_int_tests.sh
user: amb-verif
command: poetry run pytest tests
# FIXME - add a way to specify unit & integration tests
ingest:
image : docker.io/vxingest/development:dev
build:
context: .
dockerfile: docker/ingest/Dockerfile
target: prod
volumes:
- ${data}:/opt/data
- ${public}:/public
secrets:
- CREDENTIALS_FILE
command: ./scripts/VXingest_utilities/run-ingest.sh
user: amb-verif
command:
- "--credentials_file=/run/secrets/CREDENTIALS_FILE"
- "--output_dir=/opt/data/test/outdir"
- "--log_dir=/opt/data/test/logs"
- "--metrics_dir=/opt/data/test/metrics"
- "--transfer_dir=/opt/data/test/xfer"
import:
image : docker.io/vxingest/development:dev
build:
context: .
dockerfile: docker/import/Dockerfile
volumes:
- ${data}:/opt/data
secrets:
- CREDENTIALS_FILE
command: ./scripts/VXingest_utilities/run-import.sh
user: amb-verif
command:
- "-c /run/secrets/CREDENTIALS_FILE"
- "-l /opt/data/xfer"
- "-t /opt/data/temp_tar"
- "-m /opt/data/common/job_metrics"
42 changes: 42 additions & 0 deletions docker/import/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# syntax = docker/dockerfile:1.2

# The import image, used for running just the import script
FROM debian:bookworm-slim AS import

ARG COMMITBRANCH=development
ARG COMMITSHA=unspecified
ARG BUILDVER=dev

ENV BRANCH=${COMMITBRANCH}
ENV COMMIT=${COMMITSHA}
ENV VERSION=${BUILDVER}

LABEL version=${BUILDVER} code.branch=${COMMITBRANCH} code.commit=${COMMITSHA}

# Run OS updates
RUN apt-get update && apt-get upgrade -y && \
# Install runtime deps for the script
apt-get install -y curl jq && \
apt-get clean && rm -rf /var/lib/apt/lists/*

# Specify the UID/GID we want our user to have.
# In this case, use the same uid/gid as the local amb-verif user.
ENV ID=5002

# Add a user with a known uid/gid
# Create a home dir so we have a place for temporary cache dirs & etc...
RUN groupadd --gid ${ID} amb-verif && \
useradd --shell /bin/bash --create-home --uid ${ID} --gid ${ID} amb-verif

WORKDIR /app

# Copy the scripts and metadata dirs so the import script can run
COPY ./scripts/ /app/
COPY ./mats_metadata_and_indexes /app/

# TODO - install the cbtools directly and remove from the git repo
# See: https://docs.couchbase.com/cloud/reference/command-line-tools.html#download-and-install-the-couchbase-command-line-tools

USER amb-verif

ENTRYPOINT ["bash", "./scripts/VXingest_utilities/run-import.sh"]
118 changes: 118 additions & 0 deletions docker/ingest/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# syntax = docker/dockerfile:1.2

# NOTE - avoid installing python packages via apt - they install an alternate version of Python

# The eccodes image, used for building the eccodes library against the correct Python version
FROM python:3.11-slim-bookworm AS eccodes

ARG ECCODES_VER=2.32.1

RUN apt-get update && \
apt-get install -y curl wget && \
apt-get install -y build-essential libssl-dev libnetcdff-dev libopenjp2-7-dev gfortran make unzip git cmake && \
mkdir /build && cd /build && \
wget https://confluence.ecmwf.int/download/attachments/45757960/eccodes-${ECCODES_VER}-Source.tar.gz && \
tar -xzf eccodes-${ECCODES_VER}-Source.tar.gz && \
mkdir eccodes-build && cd eccodes-build && \
mkdir /eccodes && \
# Default install location is /usr/local/{bin,include,lib}, use the CMAKE prefix to change to /eccodes
# Note you'll need to set some bashrc variables (ECCODES_DEFINITION_PATH & ECCODES_DIR) in subsequent images
# cmake -DCMAKE_INSTALL_PREFIX=/eccodes -DBUILD_SHARED_LIBS=ON -DENABLE_JPG=ON ../eccodes-${ECCODES_VER}-Source && \
cmake -DCMAKE_INSTALL_PREFIX=/eccodes ../eccodes-${ECCODES_VER}-Source && \
# Use nproc to get the number of available cores - use all but 1 core for compilation
make -j$(( $(nproc)-1 )) && \
make install


# The builder image, used for building the virtual environment
FROM python:3.11-slim-bookworm AS builder

COPY --from=eccodes /eccodes /usr/local

# Make sure eccodes knows where to find its definitions
ENV ECCODES_DEFINITION_PATH=/usr/local/share/eccodes/definitions/ \
ECCODES_DIR=/usr/local/

RUN apt-get update && \
# Runtime deps for the native eccodes library
apt-get install -y libopenjp2-7 libaec0 && \
# cftime build deps
apt-get install -y build-essential && \
apt-get clean && rm -rf /var/lib/apt/lists/*

RUN pip install --no-cache-dir --upgrade pip setuptools wheel
RUN pip install --no-cache-dir poetry

ENV POETRY_NO_INTERACTION=1 \
POETRY_VIRTUALENVS_IN_PROJECT=1 \
POETRY_VIRTUALENVS_CREATE=1 \
POETRY_CACHE_DIR=/tmp/poetry_cache

WORKDIR /app

# Install just the runtime dependencies, no dev tooling
COPY pyproject.toml poetry.lock ./
RUN poetry install --without dev --no-root && rm -rf $POETRY_CACHE_DIR


# The dev image, used for testing
FROM builder AS dev

# Use like the following:
# docker build --target=dev -f Dockerfile -t vxingest:dev .
# docker run --rm --mount type=bind,src=$(pwd)/tmp/test-data/opt/data,dst=/opt/data -it vxingest:dev bash
# poetry run pytest tests

# Install the app and dev dependencies so we can run tests & tooling
COPY . /app
RUN poetry install


# The runtime image, used for running just the application with its dependencies
FROM python:3.11-slim-bookworm AS prod

ARG COMMITBRANCH=development
ARG COMMITSHA=unspecified
ARG BUILDVER=dev

ENV BRANCH=${COMMITBRANCH}
ENV COMMIT=${COMMITSHA}
ENV VERSION=${BUILDVER}

LABEL version=${BUILDVER} code.branch=${COMMITBRANCH} code.commit=${COMMITSHA}

# Activate the virtual environment
ENV VIRTUAL_ENV=/app/.venv \
PATH="/app/.venv/bin:$PATH"

# Copy in dependencies
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
COPY --from=eccodes /eccodes /usr/local

# Make sure eccodes knows where to find its definitions
ENV ECCODES_DEFINITION_PATH=/usr/local/share/eccodes/definitions/ \
ECCODES_DIR=/usr/local/

WORKDIR /app

# Specify the UID/GID we want our user to have.
# In this case, use the same uid/gid as the local amb-verif user.
ENV ID=5002

# Add a user with a known uid/gid
# Create a home dir so we have a place for temporary cache dirs & etc...
RUN groupadd --gid ${ID} python && \
useradd --shell /bin/bash --create-home --uid ${ID} --gid ${ID} python

# Run OS updates
RUN apt-get update && apt-get upgrade -y && \
# Install runtime deps for the native eccodes library
apt-get install -y libopenjp2-7 libaec0 && \
apt-get clean && rm -rf /var/lib/apt/lists/*

# Copy just the vxingest app
COPY ./src/ /app/

USER python

ENTRYPOINT ["python", "-m", "vxingest.main"]
Loading

0 comments on commit c2d19e4

Please sign in to comment.