-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.slurm
57 lines (51 loc) · 1.34 KB
/
run.slurm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/bin/bash
#SBATCH --job-name=gen_RNN
#SBATCH --mail-type=END
#SBATCH [email protected]
#SBATCH --account=deepthought
#SBATCH --partition=gpu-2080ti
# SBATCH --partition=ckpt
#SBATCH --nodes=3
#SBATCH --mem=20G
#SBATCH --gres=gpu:1
#SBATCH --chdir=.
#SBATCH --output=./slurm_out/slurm-%j.out
# TEST_ONLY=false
TEST_ONLY=true
RUN="DP"
# RUN="MNIST"
# Your programs to run.
RUNID=10
declare -a models=("SR" "SO_SC" "SO_FR")
# declare -a run_ids=("4")
# declare -a models=("SR")
if [ "$RUN" = "DP" ]; then
echo "DP experiment"
hid_dim=1000
nepochs=1000
elif [ "$RUN" = "MNIST" ]; then
echo "MNIST experiment"
hid_dim=20000
nepochs=1000
fi
if [ "$TEST_ONLY" = true ]; then
echo "TEST_ONLY"
for i in ${!models[@]}; do
srun --ntasks=1 --gres=gpu:1 python main.py --runner $RUN --run_id $RUNID --test &
((RUNID++))
done
else
echo "TRAINING AND TESTING"
for i in ${!models[@]}; do
srun --ntasks=1 --gres=gpu:1 python main.py --runner $RUN --run_id $RUNID --hid_dim $hid_dim --nepochs $nepochs --model ${models[$i]} --nonlin relu&
((RUNID++))
done
wait
RUNID=$((RUNID-${#models[@]}))
for i in ${!models[@]}; do
srun --ntasks=1 --gres=gpu:1 python main.py --runner $RUN --run_id $RUNID --test &
((RUNID++))
done
fi
# wait for all programs to finish
wait