forked from OleHolmNielsen/Slurm_tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathshowjob
executable file
·140 lines (128 loc) · 4.2 KB
/
showjob
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env bash
# Show status of Slurm job(s).
# Both queue information and accounting information is printed.
# Homepage: https://github.com/OleHolmNielsen/Slurm_tools/
if test $# -ne 1
then
echo "Usage: showjob <jobid>"
exit 1
fi
jobid=$1
###########################################################################
# Get queue information
TMPFILE=/tmp/showjob.$$
# Check if job is in the queue
jobfound=`squeue -ho "%i" -j $jobid 2>/dev/null`
if test $? -eq 0
then
reason=`squeue -ho "%r" -j $jobid`
# Display job reason code (if other than "None")
if test "$reason" != "None"
then
jobuser=`squeue -ho "%u" -j $jobid`
jobaccount=`squeue -ho "%a" -j $jobid`
jobstate=`squeue -ho "%T" -j $jobid`
echo Job $jobid of user $jobuser in account $jobaccount has a jobstate=$jobstate with reason=$reason
if test "$reason" == "AssocGrpCPURunMinutesLimit"
then
echo
echo Information about AssocGrpCPURunMinutesLimit:
GrpTRESRunMins=`sacctmgr -snr show user $jobuser accounts=$jobaccount format=GrpTRESRunMins | awk '{print $1}'`
if test -n "$GrpTRESRunMins"
then
echo "User GrpTRESRunMins limit is: $GrpTRESRunMins"
else
# No user value, get parent account value
GrpTRESRunMins=`sacctmgr -snr show association accounts=$jobaccount format=GrpTRESRunMins | awk 'NR==1{print $1}'`
echo "Account GrpTRESRunMins limit is: $GrpTRESRunMins"
fi
echo -n "Current user TRESRunMins is: "
sshare -hUlp -u $jobuser -A $jobaccount | awk -F'|' '{split($11,array,","); print array[1]}'
NCPUS=`squeue -ho "%C" -j $jobid`
timelimit=`squeue -ho "%l" -j $jobid | awk -F- '{split($2,hms,":"); print $1*24*60 + hms[1]*60 + hms[2]}'`
echo "This job requires TRESRunMins: cpu=$((NCPUS*timelimit))"
elif test "$reason" == "AssocGrpCpuLimit"
then
echo
echo Information about AssocGrpCpuLimit:
GrpTRES=`sacctmgr -snr show user $jobuser accounts=$jobaccount format=GrpTRES | awk '{print $1}'`
if test -n "$GrpTRES"
then
echo "User GrpTRES limit is: $GrpTRES"
else
# No user value, get parent account value
GrpTRES=`sacctmgr -snr show association accounts=$jobaccount format=GrpTRES | awk 'NR==1{print $1}'`
echo "Account GrpTRES limit is: $GrpTRES"
fi
echo -n "Current user TRES is: "
# Count number of CPUs for running jobs
squeue -ho "%C" -u $jobuser -t running | awk '{ncpus+=$1}END{print "cpu=" ncpus}'
NCPUS=`squeue -ho "%C" -j $jobid`
echo "This job requires TRES: cpu=$NCPUS"
fi
echo
fi
# Job found in the queue
echo "Queued job information:"
scontrol show job $jobid | tee $TMPFILE
# Parse the job status information (read E-mail)
export jobscript=`grep Command= $TMPFILE | sed 's/^ Command=//'`
if test "$jobscript" != "(null)"
then
grep "mail-user" $jobscript > $TMPFILE
if test -s $TMPFILE
then
echo Job script E-mail has been set:
cat $TMPFILE
fi
fi
rm -f $TMPFILE
else
echo "The job $jobid is not in the current Slurm queue."
fi
###########################################################################
# Execute sacct and print nicely formatted output
function format_sacct()
{
# echo Called format_sacct with $@
# Add -P flag for parseable output with fields separated by "|"
sacct -P $@ | awk -F "|" '
{
for (i=1; i<= NF; i++) {
column[i][NR] = $i
len = length($i)
if (len > colwidth[i]) colwidth[i] = len # Column width
}
cols=NF
lines=NR
} END {
# Print a header
for (i=1; i<= cols; i++) {
printf("%*s ", colwidth[i], column[i][1])
}
printf("\n")
for (i=1; i<= cols; i++) {
printf("%.*s ", colwidth[i], "-----------------------------------------------------")
}
printf("\n")
# Print lines
for (l=2; l<=lines; l++) {
for (i=1; i<= cols; i++) {
printf("%*s ", colwidth[i], column[i][l])
}
printf("\n")
}
}'
}
###########################################################################
# Get job accounting information
echo
echo "Accounting information from the Slurm database:"
echo
echo "Job parameters for jobid $jobid:"
export jobvars="jobid,jobname,user,account,partition,Timelimit"
format_sacct -j $jobid -o $jobvars
echo
echo "Job details information for jobid $jobid:"
export jobvars="jobid,Start,elapsed,End,CPUTime,NNodes,NCPUS,ExitCode,nodelist"
format_sacct -j $jobid -o $jobvars