diff --git a/base/Dockerfile b/base/Dockerfile index 9f4693c..3e44a06 100644 --- a/base/Dockerfile +++ b/base/Dockerfile @@ -27,7 +27,8 @@ RUN yum -y install \ munge-libs \ munge-devel \ openssh-server \ - openssh-clients + openssh-clients \ + nfs-utils # install slurm 17.11.5 COPY rpms /packages diff --git a/controller/Dockerfile b/controller/Dockerfile index e31443b..76acc8e 100644 --- a/controller/Dockerfile +++ b/controller/Dockerfile @@ -34,7 +34,10 @@ ENV USE_SLURMDBD=true \ SLURMD_PORT=6818 \ ACCOUNTING_STORAGE_HOST=database \ ACCOUNTING_STORAGE_PORT=6819 \ - PARTITION_NAME=docker + PARTITION_NAME=docker \ + NFS_SERVER=nfs-server \ + NFS_SERVER_DIRS=/nfs/secret:/nfs/home:/nfs/modules:/nfs/modulefiles \ + NFS_CLIENT_DIRS=/secret:/home:/opt/apps/Linux:/opt/apps/modulefiles/Linux # clean up RUN rm -f /packages/slurm-*.rpm /packages/openmpi-*.rpm \ diff --git a/controller/docker-entrypoint.sh b/controller/docker-entrypoint.sh index 0bcf241..07eb2d8 100755 --- a/controller/docker-entrypoint.sh +++ b/controller/docker-entrypoint.sh @@ -1,6 +1,39 @@ #!/usr/bin/env bash set -e +_init_fstab() { + if [[ ! -f /etc/fstab ]]; then + cat > /etc/fstab << EOF +### : 0 0 +EOF + fi +} + +_export_nfs_mounts() { + IFS=':' read -r -a MNT_SERVER_ARRAY <<< "$NFS_SERVER_DIRS" + IFS=':' read -r -a MNT_CLIENT_ARRAY <<< "$NFS_CLIENT_DIRS" + for i in "${!MNT_CLIENT_ARRAY[@]}"; do + if [[ ! -d ${MNT_CLIENT_ARRAY[$i]} ]]; then + mkdir -p ${MNT_CLIENT_ARRAY[$i]} + fi + cat >> /etc/fstab <&1 >/dev/null)$? ]; do + echo -n "." + sleep 2 + done +} + # start sshd server _sshd_host() { if [ ! -d /var/run/sshd ]; then @@ -193,6 +226,14 @@ _slurmctld() { } ### main ### +_start_nfs_services +_init_fstab +_export_nfs_mounts +mount -a + +rpcinfo -p $NFS_SERVER +showmount -e $NFS_SERVER + _sshd_host _ssh_worker _munge_start diff --git a/database/Dockerfile b/database/Dockerfile index 8763ed5..315bc98 100644 --- a/database/Dockerfile +++ b/database/Dockerfile @@ -7,7 +7,10 @@ ENV DBD_ADDR=database \ STORAGE_HOST=database.local.dev \ STORAGE_PORT=3306 \ STORAGE_PASS=password \ - STORAGE_USER=slurm + STORAGE_USER=slurm \ + NFS_SERVER=nfs-server \ + NFS_SERVER_DIRS=/nfs/secret:/nfs/home:/nfs/modules:/nfs/modulefiles \ + NFS_CLIENT_DIRS=/secret:/home:/opt/apps/Linux:/opt/apps/modulefiles/Linux # clean up RUN rm -f /packages/slurm-*.rpm /packages/openmpi-*.rpm \ diff --git a/database/docker-entrypoint.sh b/database/docker-entrypoint.sh index 81cd307..e81acc2 100755 --- a/database/docker-entrypoint.sh +++ b/database/docker-entrypoint.sh @@ -3,6 +3,39 @@ set -e SLURM_ACCT_DB_SQL=/slurm_acct_db.sql +_init_fstab() { + if [[ ! -f /etc/fstab ]]; then + cat > /etc/fstab << EOF +### : 0 0 +EOF + fi +} + +_export_nfs_mounts() { + IFS=':' read -r -a MNT_SERVER_ARRAY <<< "$NFS_SERVER_DIRS" + IFS=':' read -r -a MNT_CLIENT_ARRAY <<< "$NFS_CLIENT_DIRS" + for i in "${!MNT_CLIENT_ARRAY[@]}"; do + if [[ ! -d ${MNT_CLIENT_ARRAY[$i]} ]]; then + mkdir -p ${MNT_CLIENT_ARRAY[$i]} + fi + cat >> /etc/fstab <&1 >/dev/null)$? ]; do + echo -n "." + sleep 2 + done +} + # start sshd server _sshd_host() { if [ ! -d /var/run/sshd ]; then @@ -130,6 +163,14 @@ _slurmdbd() { } ### main ### +_start_nfs_services +_init_fstab +_export_nfs_mounts +mount -a + +rpcinfo -p $NFS_SERVER +showmount -e $NFS_SERVER + _sshd_host _mariadb_start _munge_start_using_key diff --git a/docker-compose-lmod-nfs.yml b/docker-compose-lmod-nfs.yml new file mode 100644 index 0000000..038d44d --- /dev/null +++ b/docker-compose-lmod-nfs.yml @@ -0,0 +1,117 @@ +version: '3.1' + +services: + nfs-server: + build: + context: ./nfs-server + dockerfile: Dockerfile + image: nfs-server + container_name: nfs-server + privileged: true + hostname: nfs-server.local.dev + networks: + - slurm + environment: + RPCNFSDCOUNT: 16 + NFS_SERVER_DIRS: '/nfs/secret:/nfs/home:/nfs/modules:/nfs/modulefiles' + volumes: + - nfs-vol:/nfs + + controller: + build: + context: ./controller + dockerfile: Dockerfile + image: scidas/slurm.controller:17.11.5 + container_name: controller + privileged: true + restart: always + hostname: controller.local.dev + networks: + - slurm + environment: + USE_SLURMDBD: 'true' + CLUSTER_NAME: snowflake + CONTROL_MACHINE: controller + SLURMCTLD_PORT: 6817 + SLURMD_PORT: 6818 + ACCOUNTING_STORAGE_HOST: database + ACCOUNTING_STORAGE_PORT: 6819 + COMPUTE_NODES: worker01 worker02 + PARTITION_NAME: docker + NFS_SERVER: nfs-server + NFS_SERVER_DIRS: '/nfs/secret:/nfs/home:/nfs/modules:/nfs/modulefiles' + NFS_CLIENT_DIRS: '/.secret:/home:/opt/apps/Linux:/opt/apps/modulefiles/Linux' + + database: + build: + context: ./database + dockerfile: Dockerfile + image: scidas/slurm.database:17.11.5 + depends_on: + - controller + container_name: database + privileged: true + restart: always + hostname: database.local.dev + networks: + - slurm + environment: + DBD_ADDR: database + DBD_HOST: database + DBD_PORT: 6819 + STORAGE_HOST: database.local.dev + STORAGE_PORT: 3306 + STORAGE_PASS: password + STORAGE_USER: slurm + NFS_SERVER: nfs-server + NFS_SERVER_DIRS: '/nfs/secret:/nfs/home:/nfs/modules:/nfs/modulefiles' + NFS_CLIENT_DIRS: '/.secret:/home:/opt/apps/Linux:/opt/apps/modulefiles/Linux' + + worker01: + build: + context: ./worker + dockerfile: Dockerfile + image: scidas/slurm.worker:17.11.5 + depends_on: + - controller + container_name: worker01 + privileged: true + restart: always + hostname: worker01.local.dev + networks: + - slurm + environment: + CONTROL_MACHINE: controller + ACCOUNTING_STORAGE_HOST: database + COMPUTE_NODES: worker01 worker02 + NFS_SERVER: nfs-server + NFS_SERVER_DIRS: '/nfs/secret:/nfs/home:/nfs/modules:/nfs/modulefiles' + NFS_CLIENT_DIRS: '/.secret:/home:/opt/apps/Linux:/opt/apps/modulefiles/Linux' + + worker02: + build: + context: ./worker + dockerfile: Dockerfile + image: scidas/slurm.worker:17.11.5 + depends_on: + - controller + container_name: worker02 + privileged: true + restart: always + hostname: worker02.local.dev + networks: + - slurm + environment: + CONTROL_MACHINE: controller + ACCOUNTING_STORAGE_HOST: database + COMPUTE_NODES: worker01 worker02 + NFS_SERVER: nfs-server + NFS_SERVER_DIRS: '/nfs/secret:/nfs/home:/nfs/modules:/nfs/modulefiles' + NFS_CLIENT_DIRS: '/.secret:/home:/opt/apps/Linux:/opt/apps/modulefiles/Linux' + +networks: + slurm: + +volumes: + nfs-vol: + external: true diff --git a/nfs-server/Dockerfile b/nfs-server/Dockerfile new file mode 100644 index 0000000..713fd8c --- /dev/null +++ b/nfs-server/Dockerfile @@ -0,0 +1,17 @@ +FROM krallin/centos-tini:7 +MAINTAINER Michael J. Stealey + +RUN rpmkeys --import file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7 && \ + yum -y --setopt=tsflags=nodocs install nfs-utils && \ + mkdir -p /nfs && \ + yum clean all + +ENV RPCNFSDCOUNT=8 \ + NFS_SERVER_DIRS='/nfs/share' + +COPY docker-entrypoint.sh /docker-entrypoint.sh + +VOLUME ["/nfs"] +EXPOSE 111/udp 2049/tcp + +ENTRYPOINT ["/usr/local/bin/tini", "--", "/docker-entrypoint.sh"] diff --git a/nfs-server/docker-entrypoint.sh b/nfs-server/docker-entrypoint.sh new file mode 100755 index 0000000..13729b7 --- /dev/null +++ b/nfs-server/docker-entrypoint.sh @@ -0,0 +1,93 @@ +#!/bin/bash +set -e + +_start_nfs() { + exportfs -a + rpcbind + rpc.statd + rpc.nfsd + rpc.mountd +} + +_nfs_server_mounts() { + IFS=':' read -r -a MNT_SERVER_ARRAY <<< "$NFS_SERVER_DIRS" + for server_mnt in "${MNT_SERVER_ARRAY[@]}"; do + if [[ ! -d $server_mnt ]]; then + mkdir -p $server_mnt + fi + chmod -R 777 $server_mnt + cat >> /etc/exports < /etc/sysconfig/nfs < /etc/fstab << EOF +### : 0 0 +EOF + fi +} + +_export_nfs_mounts() { + IFS=':' read -r -a MNT_SERVER_ARRAY <<< "$NFS_SERVER_DIRS" + IFS=':' read -r -a MNT_CLIENT_ARRAY <<< "$NFS_CLIENT_DIRS" + for i in "${!MNT_CLIENT_ARRAY[@]}"; do + if [[ ! -d ${MNT_CLIENT_ARRAY[$i]} ]]; then + mkdir -p ${MNT_CLIENT_ARRAY[$i]} + fi + cat >> /etc/fstab <&1 >/dev/null)$? ]; do + echo -n "." + sleep 2 + done +} + # start sshd server _sshd_host() { if [ ! -d /var/run/sshd ]; then @@ -61,6 +94,14 @@ _slurmd() { } ### main ### +_start_nfs_services +_init_fstab +_export_nfs_mounts +mount -a + +rpcinfo -p $NFS_SERVER +showmount -e $NFS_SERVER + _sshd_host _munge_start_using_key _wait_for_worker