Skip to content

Commit

Permalink
BLD: Fix slurm CI/CD workflow (xorbitsai#801)
Browse files Browse the repository at this point in the history
Co-authored-by: Dawnfz-lenfeng <[email protected]>
  • Loading branch information
Dawnfz-Lenfeng and Dawnfz-lenfeng authored Aug 20, 2024
1 parent 310a866 commit 221387f
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 14 deletions.
7 changes: 6 additions & 1 deletion CI/slurm/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
FROM daskdev/dask-jobqueue:slurm
RUN pip install xorbits

SHELL ["/bin/bash", "-c"]

# activate conda env
RUN echo "source /opt/anaconda/bin/activate dask-jobqueue" >> ~/.bashrc
ENV PATH /opt/anaconda/envs/dask-jobqueue/bin:$PATH
12 changes: 4 additions & 8 deletions CI/slurm/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ services:
common-network:

slurmdbd:
image: daskdev/dask-jobqueue:slurm
build: .
image: slurmbase
command: ["slurmdbd"]
container_name: slurmdbd
hostname: slurmdbd
Expand All @@ -33,8 +32,7 @@ services:
common-network:

slurmctld:
image: daskdev/dask-jobqueue:slurm
build: .
image: slurmbase
command: ["slurmctld"]
container_name: slurmctld
hostname: slurmctld
Expand All @@ -58,8 +56,7 @@ services:
- NET_ADMIN

c1:
image: daskdev/dask-jobqueue:slurm
build: .
image: slurmbase
command: ["slurmd"]
hostname: c1
container_name: c1
Expand All @@ -81,8 +78,7 @@ services:
- NET_ADMIN

c2:
image: daskdev/dask-jobqueue:slurm
build: .
image: slurmbase
command: ["slurmd"]
hostname: c2
container_name: c2
Expand Down
2 changes: 1 addition & 1 deletion CI/slurm/register_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
set -e

docker exec slurmctld bash -c "/usr/bin/sacctmgr --immediate add cluster name=linux" && \
docker-compose restart slurmdbd slurmctld
docker compose restart slurmdbd slurmctld
7 changes: 4 additions & 3 deletions CI/slurm/slurm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@

function jobqueue_before_install {
docker version
docker-compose version
docker compose version

# start slurm cluster
cd ./CI/slurm
docker-compose pull
docker build -t slurmbase .
./start-slurm.sh
cd -

#Set shared space permissions
# set shared space permissions
docker exec slurmctld /bin/bash -c "chmod -R 777 /shared_space"

docker ps -a
Expand All @@ -35,6 +35,7 @@ function show_network_interfaces {
for c in slurmctld c1 c2; do
echo '------------------------------------------------------------'
echo docker container: $c
docker exec $c pip install psutil
docker exec $c python -c 'import psutil; print(psutil.net_if_addrs().keys())'
echo '------------------------------------------------------------'
done
Expand Down
2 changes: 1 addition & 1 deletion CI/slurm/start-slurm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
docker-compose up -d --no-build
docker compose up -d --no-build

while [ `./register_cluster.sh 2>&1 | grep "sacctmgr: error" | wc -l` -ne 0 ]
do
Expand Down

0 comments on commit 221387f

Please sign in to comment.