From 301de4ab3f93eeafe3149652be6f36a554163b88 Mon Sep 17 00:00:00 2001 From: runame Date: Mon, 18 Sep 2023 16:15:54 +0000 Subject: [PATCH 1/3] Switch to absolute paths in Dockerfile --- docker/Dockerfile | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index ab6a798c1..dceee80ca 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -34,24 +34,24 @@ RUN echo "Setting up algorithmic_efficiency repo" ARG branch="main" ARG framework="both" ARG git_url=https://github.com/mlcommons/algorithmic-efficiency.git -RUN git clone $git_url && cd algorithmic-efficiency -RUN cd algorithmic-efficiency && git checkout $branch +RUN git clone $git_url && cd /algorithmic-efficiency +RUN cd /algorithmic-efficiency && git checkout $branch -RUN cd algorithmic-efficiency && pip install -e '.[full]' +RUN cd /algorithmic-efficiency && pip install -e '.[full]' RUN if [ "$framework" = "jax" ] ; then \ echo "Installing Jax GPU" \ - && cd algorithmic-efficiency \ + && cd /algorithmic-efficiency \ && pip install -e '.[jax_gpu]' -f 'https://storage.googleapis.com/jax-releases/jax_cuda_releases.html' \ && pip install -e '.[pytorch_cpu]' -f 'https://download.pytorch.org/whl/torch_stable.html'; \ elif [ "$framework" = "pytorch" ] ; then \ echo "Installing Pytorch GPU" \ - && cd algorithmic-efficiency \ + && cd /algorithmic-efficiency \ && pip install -e '.[jax_cpu]' \ && pip install -e '.[pytorch_gpu]' -f 'https://download.pytorch.org/whl/torch_stable.html'; \ elif [ "$framework" = "both" ] ; then \ echo "Installing Jax GPU and Pytorch GPU" \ - && cd algorithmic-efficiency \ + && cd /algorithmic-efficiency \ && pip install -e '.[jax_gpu]' -f 'https://storage.googleapis.com/jax-releases/jax_cuda_releases.html' \ && pip install -e '.[pytorch_gpu]' -f 'https://download.pytorch.org/whl/torch_stable.html'; \ else \ @@ -59,13 +59,13 @@ RUN if [ "$framework" = "jax" ] ; then \ && exit 1 ; \ fi -RUN cd algorithmic-efficiency && pip install -e '.[wandb]' +RUN cd /algorithmic-efficiency && pip install -e '.[wandb]' -RUN cd algorithmic-efficiency && git fetch origin -RUN cd algorithmic-efficiency && git pull +RUN cd /algorithmic-efficiency && git fetch origin +RUN cd /algorithmic-efficiency && git pull # Todo: remove this, this is temporary for developing COPY scripts/startup.sh /algorithmic-efficiency/docker/scripts/startup.sh RUN chmod a+x /algorithmic-efficiency/docker/scripts/startup.sh -ENTRYPOINT ["bash", "algorithmic-efficiency/docker/scripts/startup.sh"] +ENTRYPOINT ["bash", "/algorithmic-efficiency/docker/scripts/startup.sh"] From cc6d0dbdf86c1f3a0c6273f14ec26608f21ad025 Mon Sep 17 00:00:00 2001 From: runame Date: Mon, 18 Sep 2023 16:23:59 +0000 Subject: [PATCH 2/3] Only set DEBIAN_FRONTEND where necessary --- docker/Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index dceee80ca..bc3b51649 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -6,13 +6,12 @@ # To build Docker image FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 -ARG DEBIAN_FRONTEND=noninteractive # Installing machine packages RUN echo "Setting up machine" RUN apt-get update RUN apt-get install -y curl tar -RUN apt-get install -y git python3 pip wget ffmpeg +RUN DEBIAN_FRONTEND=noninteractive apt-get install -y git python3 pip wget ffmpeg RUN apt-get install libtcmalloc-minimal4 RUN apt-get install unzip RUN apt-get install pigz From e7b854c3116cfa58fd81f7f33d8891cd7bd9cdfa Mon Sep 17 00:00:00 2001 From: runame Date: Mon, 18 Sep 2023 19:44:20 +0200 Subject: [PATCH 3/3] Add instructions for running Singularity/Apptainer container to README --- README.md | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a4536c35e..1be096c2e 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ ## Installation -You can install this package and dependences in a [python virtual environment](#virtual-environment) or use a [Docker container](#install-in-docker) (recommended). +You can install this package and dependences in a [python virtual environment](#virtual-environment) or use a [Docker/Singularity/Apptainer container](#install-in-docker) (recommended). *TL;DR to install the Jax version for GPU run:* @@ -89,7 +89,8 @@ pip3 install -e '.[full]' ## Docker -We recommend using a Docker container to ensure a similar environment to our scoring and testing environments. +We recommend using a Docker container to ensure a similar environment to our scoring and testing environments. +Alternatively, a Singularity/Apptainer container can also be used (see instructions below). **Prerequisites for NVIDIA GPU set up**: You may have to install the NVIDIA Container Toolkit so that the containers can locate the NVIDIA drivers and GPUs. @@ -133,6 +134,25 @@ To use the Docker container as an interactive virtual environment, you can run a ### Running Docker Container (End-to-end) To run a submission end-to-end in a containerized environment see [Getting Started Document](./getting_started.md#run-your-submission-in-a-docker-container). +### Using Singularity/Apptainer instead of Docker +Since many compute clusters don't allow the usage of Docker due to securtiy concerns and instead encourage the use of [Singularity/Apptainer](https://github.com/apptainer/apptainer) (formerly Singularity, now called Apptainer), we also provide instructions on how to build an Apptainer container based on the here provided Dockerfile. + +To convert the Dockerfile into an Apptainer definition file, we will use [spython](https://github.com/singularityhub/singularity-cli): +```bash +pip3 install spython +cd algorithmic-efficiency/docker +spython recipe Dockerfile &> Singularity.def +``` +Now we can build the Apptainer image by running +```bash +singularity build --fakeroot .sif Singularity.def +``` +To start a shell session with GPU support (by using the `--nv` flag), we can run +```bash +singularity shell --nv .sif +``` +Similarly to Docker, Apptainer allows you to bind specific paths on the host system and the container by specifying the `--bind` flag, as explained [here](https://docs.sylabs.io/guides/3.7/user-guide/bind_paths_and_mounts.html). + # Getting Started For instructions on developing and scoring your own algorithm in the benchmark see [Getting Started Document](./getting_started.md). ## Running a workload