forked from related-sciences/gce-github-runner
-
Notifications
You must be signed in to change notification settings - Fork 0
/
action.sh
executable file
·347 lines (324 loc) · 10.9 KB
/
action.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
#!/usr/bin/env bash
ACTION_DIR="$( cd $( dirname "${BASH_SOURCE[0]}" ) >/dev/null 2>&1 && pwd )"
function usage {
echo "Usage: ${0} --command=[start|stop] <arguments>"
}
function safety_on {
set -o errexit -o pipefail -o noclobber -o nounset
}
function safety_off {
set +o errexit +o pipefail +o noclobber +o nounset
}
source "${ACTION_DIR}/vendor/getopts_long.sh"
command=
token=
project_id=
service_account_key=
runner_ver=
machine_zone=
machine_type=
boot_disk_type=
disk_size=
runner_service_account=
image_project=
image=
image_family=
network=
scopes=
shutdown_timeout=
subnet=
preemptible=
ephemeral=
no_external_address=
actions_preinstalled=
maintenance_policy_terminate=
arm=
accelerator=
OPTLIND=1
while getopts_long :h opt \
command required_argument \
token required_argument \
project_id required_argument \
service_account_key required_argument \
runner_ver required_argument \
machine_zone required_argument \
machine_type required_argument \
boot_disk_type optional_argument \
disk_size optional_argument \
runner_service_account optional_argument \
image_project optional_argument \
image optional_argument \
image_family optional_argument \
network optional_argument \
scopes required_argument \
shutdown_timeout required_argument \
subnet optional_argument \
preemptible required_argument \
ephemeral required_argument \
no_external_address required_argument \
actions_preinstalled required_argument \
arm required_argument \
maintenance_policy_terminate optional_argument \
accelerator optional_argument \
help no_argument "" "$@"
do
case "$opt" in
command)
command=$OPTLARG
;;
token)
token=$OPTLARG
;;
project_id)
project_id=$OPTLARG
;;
service_account_key)
service_account_key="$OPTLARG"
;;
runner_ver)
runner_ver=$OPTLARG
;;
machine_zone)
machine_zone=$OPTLARG
;;
machine_type)
machine_type=$OPTLARG
;;
boot_disk_type)
boot_disk_type=${OPTLARG-$boot_disk_type}
;;
disk_size)
disk_size=${OPTLARG-$disk_size}
;;
runner_service_account)
runner_service_account=${OPTLARG-$runner_service_account}
;;
image_project)
image_project=${OPTLARG-$image_project}
;;
image)
image=${OPTLARG-$image}
;;
image_family)
image_family=${OPTLARG-$image_family}
;;
network)
network=${OPTLARG-$network}
;;
scopes)
scopes=$OPTLARG
;;
shutdown_timeout)
shutdown_timeout=$OPTLARG
;;
subnet)
subnet=${OPTLARG-$subnet}
;;
preemptible)
preemptible=$OPTLARG
;;
ephemeral)
ephemeral=$OPTLARG
;;
no_external_address)
no_external_address=$OPTLARG
;;
actions_preinstalled)
actions_preinstalled=$OPTLARG
;;
maintenance_policy_terminate)
maintenance_policy_terminate=${OPTLARG-$maintenance_policy_terminate}
;;
arm)
arm=$OPTLARG
;;
accelerator)
accelerator=$OPTLARG
;;
h|help)
usage
exit 0
;;
:)
printf >&2 '%s: %s\n' "${0##*/}" "$OPTLERR"
usage
exit 1
;;
esac
done
function gcloud_auth {
# NOTE: when --project is specified, it updates the config
echo ${service_account_key} | gcloud --project ${project_id} --quiet auth activate-service-account --key-file - &>/dev/null
echo "✅ Successfully configured gcloud."
}
function start_vm {
echo "Starting GCE VM ..."
if [[ -z "${service_account_key}" ]] || [[ -z "${project_id}" ]]; then
echo "Won't authenticate gcloud. If you wish to authenticate gcloud provide both service_account_key and project_id."
else
echo "Will authenticate gcloud."
gcloud_auth
fi
RUNNER_TOKEN=$(curl -S -s -XPOST \
-H "authorization: Bearer ${token}" \
https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/runners/registration-token |\
jq -r .token)
echo "✅ Successfully got the GitHub Runner registration token"
VM_ID="gce-gh-runner-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
service_account_flag=$([[ -z "${runner_service_account}" ]] || echo "--service-account=${runner_service_account}")
image_project_flag=$([[ -z "${image_project}" ]] || echo "--image-project=${image_project}")
image_flag=$([[ -z "${image}" ]] || echo "--image=${image}")
image_family_flag=$([[ -z "${image_family}" ]] || echo "--image-family=${image_family}")
disk_size_flag=$([[ -z "${disk_size}" ]] || echo "--boot-disk-size=${disk_size}")
boot_disk_type_flag=$([[ -z "${boot_disk_type}" ]] || echo "--boot-disk-type=${boot_disk_type}")
preemptible_flag=$([[ "${preemptible}" == "true" ]] && echo "--preemptible" || echo "")
ephemeral_flag=$([[ "${ephemeral}" == "true" ]] && echo "--ephemeral" || echo "")
no_external_address_flag=$([[ "${no_external_address}" == "true" ]] && echo "--no-address" || echo "")
network_flag=$([[ ! -z "${network}" ]] && echo "--network=${network}" || echo "")
subnet_flag=$([[ ! -z "${subnet}" ]] && echo "--subnet=${subnet}" || echo "")
accel_only=$(echo ${accelerator} | awk -F'[=,]' '{print $2}')
accelerator=$([[ ! -z "${accelerator}" ]] && echo "--accelerator=${accelerator} --maintenance-policy=TERMINATE" || echo "")
maintenance_policy_flag=$([[ -z "${maintenance_policy_terminate}" ]] || echo "--maintenance-policy=TERMINATE" )
echo "The new GCE VM will be ${VM_ID}"
if [[ "$runner_ver" = "latest" ]]; then
latest_ver=$(curl -sL https://api.github.com/repos/actions/runner/releases/latest | jq -r '.tag_name' | sed -e 's/^v//')
runner_ver="$latest_ver"
echo "✅ runner_ver=latest is specified. v$latest_ver is detected as the latest version."
if [[ -z "$latest_ver" || "null" == "$latest_ver" ]]; then
echo "❌ could not retrieve the latest version of a runner"
exit 2
fi
fi
# GCE VM label values requirements:
# - can contain only lowercase letters, numeric characters, underscores, and dashes
# - have a maximum length of 63 characters
# ref: https://cloud.google.com/compute/docs/labeling-resources#requirements
#
# Github's requirements:
# - username/organization name
# - Max length: 39 characters
# - All characters must be either a hyphen (-) or alphanumeric
# - repository name
# - Max length: 100 code points
# - All code points must be either a hyphen (-), an underscore (_), a period (.),
# or an ASCII alphanumeric code point
# ref: https://github.com/dead-claudia/github-limits
function truncate_to_label {
local in="${1}"
in="${in:0:63}" # ensure max length
in="${in//./_}" # replace '.' with '_'
in=$(tr '[:upper:]' '[:lower:]' <<< "${in}") # convert to lower
echo -n "${in}"
}
gh_repo_owner="$(truncate_to_label "${GITHUB_REPOSITORY_OWNER}")"
gh_repo="$(truncate_to_label "${GITHUB_REPOSITORY##*/}")"
gh_run_id="${GITHUB_RUN_ID}"
function create_vm {
echo "🔄 Attempting to create VM in zone: ${machine_zone}"
safety_off
gcloud compute instances create ${VM_ID} \
--zone=${machine_zone} \
${disk_size_flag} \
${boot_disk_type_flag} \
--machine-type=${machine_type} \
--scopes=${scopes} \
${service_account_flag} \
${image_project_flag} \
${image_flag} \
${image_family_flag} \
${preemptible_flag} \
${no_external_address_flag} \
${network_flag} \
${subnet_flag} \
${accelerator} \
${maintenance_policy_flag} \
--labels=gh_ready=0,gh_repo_owner="${gh_repo_owner}",gh_repo="${gh_repo}",gh_run_id="${gh_run_id}" \
--metadata=startup-script="$startup_script"
}
if [[ -z "${accelerator}" ]]; then
create_vm
else
zones=$(gcloud compute accelerator-types list --verbosity=error --filter="name=${accel_only} AND zone:us-*" --format="value(zone)" | shuf)
for zone in $zones; do
machine_zone=$zone
startup_script="
#!/bin/bash
mkdir /actions-runner
cd /actions-runner
curl -o actions-runner-linux-x64-${runner_ver}.tar.gz -L https://github.com/actions/runner/releases/download/v${runner_ver}/actions-runner-linux-x64-${runner_ver}.tar.gz
tar xzf ./actions-runner-linux-x64-${runner_ver}.tar.gz
./bin/installdependencies.sh && \\
# Create a systemd service in charge of shutting down the machine once the workflow has finished
cat <<-EOF > /etc/systemd/system/shutdown.sh
#!/bin/sh
sleep ${shutdown_timeout}
gcloud compute instances delete $VM_ID --zone=$machine_zone --quiet
EOF
cat <<-EOF > /etc/systemd/system/shutdown.service
[Unit]
Description=Shutdown service
[Service]
ExecStart=/etc/systemd/system/shutdown.sh
[Install]
WantedBy=multi-user.target
EOF
chmod +x /etc/systemd/system/shutdown.sh
systemctl daemon-reload
systemctl enable shutdown.service
cat <<-EOF > /usr/bin/gce_runner_shutdown.sh
#!/bin/sh
echo \"✅ Self deleting $VM_ID in ${machine_zone} in ${shutdown_timeout} seconds ...\"
# We tear down the machine by starting the systemd service that was registered by the startup script
systemctl start shutdown.service
EOF
# Install driver if this is a deeplearning image is specified
if [ \"${image_project}\" == \"deeplearning-platform-release\" ]; then
sudo /opt/deeplearning/install-driver.sh
fi
# See: https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/running-scripts-before-or-after-a-job
echo "ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/usr/bin/gce_runner_shutdown.sh" >.env
gcloud compute instances add-labels ${VM_ID} --zone=${machine_zone} --labels=gh_ready=0 && \\
RUNNER_ALLOW_RUNASROOT=1 ./config.sh --url https://github.com/${GITHUB_REPOSITORY} --token ${RUNNER_TOKEN} --labels ${VM_ID} --unattended ${ephemeral_flag} --disableupdate && \\
./svc.sh install && \\
./svc.sh start && \\
gcloud compute instances add-labels ${VM_ID} --zone=${machine_zone} --labels=gh_ready=1
# 3 days represents the max workflow runtime. This will shutdown the instance if everything else fails.
nohup sh -c \"sleep 3d && gcloud --quiet compute instances delete ${VM_ID} --zone=${machine_zone}\" > /dev/null &
"
create_vm
[[ $? -eq 0 ]] && break
done
fi
echo "label=${VM_ID}" >> $GITHUB_OUTPUT
count=60
interval=10
seconds=$(( $count * $interval ))
minutes=$(( $seconds / 60 ))
i=0
while (( i++ < $count )); do
GH_READY=$(gcloud compute instances describe ${VM_ID} --zone=${machine_zone} --format='json(labels)' | jq -r .labels.gh_ready)
if [[ $GH_READY == 1 ]]; then
break
fi
echo "${VM_ID} not ready yet, waiting $interval secs ..."
sleep $interval
done
if [[ $GH_READY == 1 ]]; then
echo "✅ ${VM_ID} ready ..."
else
echo "Waited $minutes minutes for ${VM_ID}, without luck, deleting ${VM_ID} ..."
gcloud --quiet compute instances delete ${VM_ID} --zone=${machine_zone}
exit 1
fi
}
safety_on
case "$command" in
start)
start_vm
;;
*)
echo "Invalid command: \`${command}\`, valid values: start" >&2
usage
exit 1
;;
esac