forked from horovod/horovod
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile.test.gpu
115 lines (95 loc) · 4.33 KB
/
Dockerfile.test.gpu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
ARG CUDA_DOCKER_VERSION=10.0-devel-ubuntu16.04
FROM nvidia/cuda:${CUDA_DOCKER_VERSION}
# Arguments for the build. CUDA_DOCKER_VERSION needs to be repeated because
# the first usage only applies to the FROM tag.
ARG CUDA_DOCKER_VERSION=10.0-devel-ubuntu16.04
ARG CUDNN_VERSION=7.6.0.64-1+cuda10.0
ARG NCCL_VERSION_OVERRIDE=2.4.7-1+cuda10.0
ARG MPI_KIND=OpenMPI
ARG PYTHON_VERSION=2.7
ARG TENSORFLOW_PACKAGE=tensorflow-gpu==1.14.0
ARG KERAS_PACKAGE=keras==2.2.4
ARG PYTORCH_PACKAGE=https://download.pytorch.org/whl/cu100/torch-1.1.0-cp27-cp27mu-linux_x86_64.whl
ARG TORCHVISION_PACKAGE=https://download.pytorch.org/whl/cu100/torchvision-0.3.0-cp27-cp27mu-linux_x86_64.whl
ARG MXNET_PACKAGE=mxnet-cu100==1.4.1
ARG PYSPARK_PACKAGE=pyspark==2.4.0
ARG HOROVOD_BUILD_FLAGS=HOROVOD_GPU_ALLREDUCE=NCCL
ARG HOROVOD_MIXED_INSTALL=0
# Set default shell to /bin/bash
SHELL ["/bin/bash", "-cu"]
# Install essential packages.
RUN apt-get update -qq
RUN apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
wget \
ca-certificates \
openssh-client \
git \
build-essential \
g++-4.8 \
libcudnn7=${CUDNN_VERSION} \
libnccl2=${NCCL_VERSION_OVERRIDE} \
libnccl-dev=${NCCL_VERSION_OVERRIDE}
# Install Python.
RUN apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev
RUN if [[ "${PYTHON_VERSION}" == "3.6" ]]; then \
apt-get install -y python${PYTHON_VERSION}-distutils; \
fi
RUN ln -s -f /usr/bin/python${PYTHON_VERSION} /usr/bin/python
RUN wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py && rm get-pip.py
RUN pip install -U --force pip setuptools requests pytest
# Install PySpark.
RUN apt install -y openjdk-8-jdk-headless
RUN pip install ${PYSPARK_PACKAGE}
# Install MPI.
RUN if [[ ${MPI_KIND} == "OpenMPI" ]]; then \
wget -O /tmp/openmpi-3.0.0-bin.tar.gz https://github.com/horovod/horovod/files/1596799/openmpi-3.0.0-bin.tar.gz && \
cd /usr/local && tar -zxf /tmp/openmpi-3.0.0-bin.tar.gz && ldconfig && \
echo "mpirun -allow-run-as-root -np 2 -H localhost:2 -bind-to none -map-by slot -mca mpi_abort_print_stack 1" > /mpirun_command; \
else \
apt-get install -y mpich && \
echo "mpirun -np 2" > /mpirun_command; \
fi
# Set default NCCL parameters
RUN echo NCCL_DEBUG=INFO >> /etc/nccl.conf
# Install mpi4py.
RUN pip install mpi4py
### END OF CACHE ###
COPY . /horovod
# Install TensorFlow.
RUN pip install ${TENSORFLOW_PACKAGE}
# Install Keras.
RUN pip install ${KERAS_PACKAGE} h5py scipy pandas
RUN mkdir -p ~/.keras
RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs && \
python -c "from keras.datasets import mnist; mnist.load_data()" && \
ldconfig
# Install PyTorch.
RUN pip install future typing
RUN if [[ ${PYTORCH_PACKAGE} == "torch-nightly" ]]; then \
PYTORCH_CUDA=$(echo ${CUDA_DOCKER_VERSION} | awk -F- '{print $1}' | sed 's/\.//'); \
pip install torch_nightly -v -f https://download.pytorch.org/whl/nightly/cu${PYTORCH_CUDA}/torch_nightly.html; \
else \
pip install ${PYTORCH_PACKAGE}; \
fi
RUN pip install ${TORCHVISION_PACKAGE} Pillow --no-deps
# Install MXNet.
RUN pip install ${MXNET_PACKAGE}
# Install Horovod.
RUN cd /horovod && python setup.py sdist
RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs && \
bash -c "${HOROVOD_BUILD_FLAGS} pip install -v /horovod/dist/horovod-*.tar.gz" && \
ldconfig
# Hack for compatibility of MNIST example with TensorFlow 1.1.0.
RUN if [[ ${TENSORFLOW_PACKAGE} == "tensorflow-gpu==1.1.0" ]]; then \
sed -i "s/from tensorflow import keras/from tensorflow.contrib import keras/" /horovod/examples/tensorflow_mnist.py; \
fi
# Hack TensorFlow MNIST example to be smaller.
RUN sed -i "s/last_step=20000/last_step=100/" /horovod/examples/tensorflow_mnist.py
# Hack TensorFlow Eager MNIST example to be smaller.
RUN sed -i "s/dataset.take(20000/dataset.take(100/" /horovod/examples/tensorflow_mnist_eager.py
# Hack Keras MNIST advanced example to be smaller.
RUN sed -i "s/epochs = .*/epochs = 9/" /horovod/examples/keras_mnist_advanced.py
# Hack PyTorch MNIST example to be smaller.
RUN sed -i "s/'--epochs', type=int, default=10,/'--epochs', type=int, default=2,/" /horovod/examples/pytorch_mnist.py
# Export HOROVOD_MIXED_INSTALL
ENV HOROVOD_MIXED_INSTALL=${HOROVOD_MIXED_INSTALL}