Skip to content

Commit

Permalink
Add CUDA + PyTorch (#31)
Browse files Browse the repository at this point in the history
* Add CUDA + PyTorch

Assumes CUDA 12.0 is installed. Might work with other versions.

Note that installing pytorch is by far the longest
part of this process. By using an image with pytorch pre-installed,
this could build much faster. However, copying torch from an image
would be pretty hard, so it might be worth copying/installing ROS
on top of a pre-built torch container.

* Build with/without GPU via arguments

Allows for disabling (--no-gpu) GPU, or forcing
(--force-gpu) GPU on platforms without a GPU.

Development environment defaults to use no GPU if
nvidia container toolkit is not present. Production
will throw a warning and fail if this is the case
and --no-gpu is not explicitly specified.

* Lock down all CUDA requirement versions
  • Loading branch information
NoRePercussions authored Nov 19, 2023
1 parent 7e10c4a commit 7198b96
Show file tree
Hide file tree
Showing 6 changed files with 187 additions and 10 deletions.
11 changes: 10 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
FROM nvidia/cuda:11.6.2-base-ubuntu20.04 as CUDA

FROM osrf/ros:noetic-desktop-full-focal

COPY --from=CUDA /usr/local/cuda /usr/local/


RUN apt update
RUN apt-get install -y -qq \
Expand All @@ -14,8 +18,14 @@ RUN apt-get install -y -qq \
ros-noetic-realsense2-camera \
ros-noetic-realsense2-description

# Run this now to cache it separately from other requirements
COPY cuda-requirements.txt cuda-requirements.txt
RUN pip3 install -r cuda-requirements.txt


COPY python-requirements.txt python-requirements.txt
RUN pip3 install -r python-requirements.txt

RUN echo 'source "/opt/ros/$ROS_DISTRO/setup.bash" --' >> ~/.bashrc
RUN echo 'cd rb_ws' >> ~/.bashrc
RUN echo 'catkin_make >/dev/null' >> ~/.bashrc
Expand All @@ -30,4 +40,3 @@ RUN pip3 install numba

# add mouse to tmux
RUN echo 'set -g mouse on' >> ~/.tmux.conf

7 changes: 7 additions & 0 deletions cuda-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
torch~=2.1.0
torchvision~=0.16.0
torchaudio~=2.1.0

# Note: using tensorflow requires AVX instructions
# which would mean we couldn't do simulations
# in almost any virtualized container.
38 changes: 38 additions & 0 deletions docker-compose-gpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
services:
main:
build: .
volumes:
- ./rb_ws:/rb_ws
- "${RLSENSE_PORT:-/dev/null}:/dev/bus/usb"
- /tmp/.X11-unix:/tmp/.X11-unix
devices:
- "${TEENSY_PORT:-/dev/null}:/dev/ttyUSB0"
- "${WEBCAM_PORT:-/dev/null}:/dev/ttyUSB1"
- "${GPS_PORT:-/dev/null}:/dev/ttyACM0"
- "${FEATHER_PORT:-/dev/null}:/dev/ttyACM1"
stdin_open: true # docker run -i
tty: true # docker run -t
environment:
- DISPLAY=host.docker.internal:0
ports:
- "0.0.0.0:8765:8765" # foxglove bridge
- "0.0.0.0:8760:8760" # Asset server for loading stuff into foxglove
platform: "linux/amd64"
device_cgroup_rules:
- "c *:* rmw"
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 'all'
capabilities: [gpu]
tileserver:
image: maptiler/tileserver-gl
volumes:
- "./maps:/data"
stdin_open: true # docker run -i
tty: true # docker run -t
command: ["-p", "80", "-c", "/data/conf.json"]
ports:
- "8080:80"
File renamed without changes.
71 changes: 67 additions & 4 deletions setup_dev.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,71 @@
#!/bin/bash
# Run to spin up docker containers and set aliases
docker compose down # kill all running containers
docker compose build
docker compose --env-file .env.dev up -d

no_gpu=false
force_gpu=false

usage() {
echo "Usage: $0 [options]"
echo "Options:"
echo " --no-gpu Disable CUDA support. Required to run on systems without Nvidia Container Toolkit."
echo " --force-gpu Force a GPU build even if Nvidia Container Toolkit is not detected"
}

while [ $# -gt 0 ]; do
case $1 in
-h | --help)
usage
exit 0
;;
--no-gpu)
no_gpu=true
;;
--force-gpu)
force_gpu=true
;;
*)
echo "Invalid option: $1" >&2
usage
exit 1
;;
esac
shift
done

if $no_gpu && $force_gpu
then
echo -e "\033[0;31mOptions --no-gpu and --force-gpu conflict. Use at most one of them.\033[0m"
exit 1
fi

if ! $no_gpu && ! $force_gpu && ! command -v nvidia-ctk &> /dev/null
then
echo -e "\033[0;31mNvidia Container Toolkit was not found.\033[0m"
echo -e "\033[0;31mRun with --force-gpu to build with GPU.\033[0m"
echo -e "\033[0;31mRun with --no-gpu to silence this message.\033[0m"
echo -e "\033[0;31mContinuing with no GPU...\033[0m"
no_gpu=true
fi

#####################
# Actual logic here #
#####################

if $no_gpu
then
dockerfile="docker-compose-no-gpu.yml"
else
dockerfile="docker-compose-gpu.yml"
fi

echo "Killing old development containers..."
docker compose -f docker-compose-no-gpu.yml down # kill old containers
docker compose -f docker-compose-gpu.yml down # kill old containers

echo "Building containers..."
docker compose -f $dockerfile build

echo "Starting containers..."
docker compose -f $dockerfile --env-file .env.dev up -d

sleep 0.5

Expand Down
70 changes: 65 additions & 5 deletions setup_prod.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,71 @@
#!/bin/bash
# Run to spin up docker containers and set aliases
docker kill $(docker ps -q) # kill all running containers
docker compose build
docker compose --env-file .env.prod up -d

no_gpu=false
force_gpu=false

usage() {
echo "Usage: $0 [options]"
echo "Options:"
echo " --no-gpu Disable CUDA support. Required to run on systems without Nvidia Container Toolkit."
echo " --force-gpu Force a GPU build even if Nvidia Container Toolkit is not detected"
}

while [ $# -gt 0 ]; do
case $1 in
-h | --help)
usage
exit 0
;;
--no-gpu)
no_gpu=true
;;
--force-gpu)
force_gpu=true
;;
*)
echo "Invalid option: $1" >&2
usage
exit 1
;;
esac
shift
done

if $no_gpu && $force_gpu
then
echo -e "\033[0;31mOptions --no-gpu and --force-gpu conflict. Use at most one of them.\033[0m"
exit 1
fi

if ! $no_gpu && ! $force_gpu && ! command -v nvidia-ctk &> /dev/null
then
# In production, we should require it to be specified when we want no GPU.
echo -e "\033[0;31mNvidia Container Toolkit was not found.\033[0m"
echo -e "\033[0;31mRun with --no-gpu to build without GPU, or --force-gpu to force a GPU build.\033[0m"
exit 1
fi

#####################
# Actual logic here #
#####################

if $no_gpu
then
dockerfile="docker-compose-no-gpu.yml"
else
dockerfile="docker-compose-gpu.yml"
fi

echo "Killing all containers..."
docker kill $(docker ps -q)

echo "Building containers..."
docker compose -f $dockerfile build

echo "Starting containers..."
docker compose -f $dockerfile --env-file .env.prod up -d

sleep 0.5

echo "DEBUG: Buggy Docker Container Up!"
echo "Run docker_exec in order to go into the Docker container"

0 comments on commit 7198b96

Please sign in to comment.