-
Notifications
You must be signed in to change notification settings - Fork 6
/
Dockerfile
87 lines (73 loc) · 4.07 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# Approximately 15 min to build
FROM nvidia/cuda:11.5.0-cudnn8-runtime-ubuntu20.04
ARG SSH_PASSWORD=password
# https://docs.docker.com/engine/examples/running_ssh_service/
# Last is SSH login fix. Otherwise user is kicked off after login
RUN apt-get update && apt-get install -y openssh-server && \
mkdir /var/run/sshd && echo "root:$SSH_PASSWORD" | chpasswd && \
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \
sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd && \
sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config && \
echo "export VISIBLE=now" >> /etc/profile && \
mkdir /root/.ssh && chmod 700 /root/.ssh && touch /root/.ssh/authorized_keys && \
chmod 644 /root/.ssh/authorized_keys
ENV NOTVISIBLE "in users profile"
ENV CONDA_DIR /opt/conda
ENV PATH $CONDA_DIR/bin:$PATH
# Install essentials + awscli + DVC
RUN apt-get install -y wget git vim htop zip libhdf5-dev g++ graphviz libgtk2.0-dev libgl1-mesa-glx \
openmpi-bin nano cmake libopenblas-dev liblapack-dev libx11-dev && \
wget "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -O "awscliv2.zip" && \
unzip awscliv2.zip && ./aws/install && rm -r aws && rm awscliv2.zip && \
wget https://dvc.org/deb/dvc.list -O /etc/apt/sources.list.d/dvc.list && apt-get update && apt-get install dvc
# Install Miniconda
RUN mkdir -p $CONDA_DIR && \
apt-get update && \
wget --quiet https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
/bin/bash /Miniconda3-latest-Linux-x86_64.sh -f -b -p $CONDA_DIR && \
rm Miniconda3-latest-Linux-x86_64.sh
COPY ./environment.yml /torchok/environment.yml
# Install TorchOk dependencies
RUN conda config --set remote_read_timeout_secs 100000.0 && \
conda init && \
conda update -n base -c defaults conda && \
conda env create -f torchok/environment.yml && \
conda clean -yt && \
echo "conda activate torchok" >> /root/.bashrc && \
echo "cd /" >> /root/.bashrc
ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
ENV LIBRARY_PATH /usr/local/cuda/lib64:/lib/x86_64-linux-gnu:$LIBRARY_PATH
ENV CUDA_HOME /usr/local/cuda
# aws cli configuration
ENV AWS_ACCESS_KEY_ID ""
ENV AWS_SECRET_ACCESS_KEY ""
ENV AWS_DEFAULT_REGION "eu-west-1"
ENV AWS_DEFAULT_OUTPUT "json"
# To access the container from the outer world
ENV SSH_PUBLIC_KEY ""
# To be able to add SSH key on docker run --env ... and to get important environment variables in SSH's bash
# writing env variables to /etc/profile as mentioned here:
# https://docs.docker.com/engine/examples/running_ssh_service/#environment-variables
RUN echo '#!/bin/bash\n \
echo $SSH_PUBLIC_KEY >> /root/.ssh/authorized_keys\n \
echo "export AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID" >> /etc/profile\n \
echo "export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY" >> /etc/profile\n \
echo "export AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION" >> /etc/profile\n \
echo "export AWS_DEFAULT_OUTPUT=$AWS_DEFAULT_OUTPUT" >> /etc/profile\n \
echo "export CONDA_DIR=$CONDA_DIR" >> /etc/profile\n \
echo "export PATH=$CONDA_DIR/bin:$PATH" >> /etc/profile\n \
echo "export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH" >> /etc/profile\n \
echo "export LIBRARY_PATH=/usr/local/cuda/lib64:/lib/x86_64-linux-gnu:$LIBRARY_PATH" >> /etc/profile\n \
echo "export CUDA_HOME=/usr/local/cuda" >> /etc/profile\n \
echo "alias juplabstart=\"nohup jupyter lab --ip 0.0.0.0 --allow-root > jup.log 2>&1 &\"" >> /etc/profile\n \
echo "alias jupnotestart=\"nohup jupyter notebook --ip 0.0.0.0 --allow-root > jup.log 2>&1 &\"" >> /etc/profile\n \
echo "alias jupkill=\"kill -9 \$(pgrep -f jupyter)\"" >> /etc/profile\n \
echo "alias tbkill=\"kill -9 \$(pgrep -f tensorboard)\"" >> /etc/profile\n \
/usr/sbin/sshd -D' \
>> /bin/start.sh
RUN echo '#!/bin/bash\n \
nohup tensorboard --bind_all --logdir=$1 > tb.log 2>&1 & echo "see tb.log for address"' \
>> /bin/tbstart.sh && chmod +x /bin/tbstart.sh
COPY . /torchok
EXPOSE 8888 6006 22
ENTRYPOINT bash /bin/start.sh