forked from deepjavalibrary/djl-serving
-
Notifications
You must be signed in to change notification settings - Fork 0
149 lines (143 loc) · 4.99 KB
/
instant_benchmark.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
name: instant benchmark tooling
on:
workflow_dispatch:
inputs:
container:
description: 'The container used to run benchmark'
required: true
default: '0.23.0-deepspeed'
running_template:
description: 'A json file that contains benchmark plans'
required: true
instance:
description: 'Instance used for benchmark'
required: true
default: 'g5.12xlarge'
type: choice
options:
- g5.2xlarge
- g5.12xlarge
- g5.48xlarge
- g4dn.12xlarge
- g4dn.2xlarge
- p4d.24xlarge
- inf2.8xlarge
- inf2.24xlarge
- trn1.2xlarge
- trn1.32xlarge
record:
description: 'Whether to record the results'
default: 'none'
type: choice
options:
- none
- table
permissions:
id-token: write
contents: read
jobs:
create-runners:
runs-on: [self-hosted, scheduler]
steps:
- name: Create new instance
id: create_instance
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_ib_${{ github.event.inputs.instance }} $token djl-serving
outputs:
gpu_instance_id: ${{ steps.create_instance.outputs.action_ib_instance_id }}
environment-setup:
runs-on: [ self-hosted, "${{ github.event.inputs.instance }}" ]
timeout-minutes: 15
needs: [ create-runners ]
steps:
- uses: actions/checkout@v3
- name: Set up Python3
uses: actions/setup-python@v4
with:
python-version: '3.10.x'
- name: Clean env
run: |
yes | docker system prune -a --volumes
- name: Parse job schema
working-directory: tests/integration
id: generate_matrix
run: |
python3 instant_benchmark.py --parse ${{ github.event.inputs.running_template }}
- name: Download container
run: |
docker pull deepjavalibrary/djl-serving:${{ github.event.inputs.container }}
outputs:
jobs: ${{ steps.generate_matrix.outputs.jobs }}
template: ${{ steps.generate_matrix.outputs.template }}
benchmark_run:
runs-on: [ self-hosted, "${{ github.event.inputs.instance }}" ]
timeout-minutes: 30
needs: [ environment-setup ]
strategy:
matrix:
job: ${{ fromJSON(needs.environment-setup.outputs.jobs) }}
steps:
- uses: actions/checkout@v3
- name: Set up Python3
uses: actions/setup-python@v4
with:
python-version: '3.10.x'
- name: install deps
run: |
sudo apt-get update
sudo apt-get install awscli -y
pip3 install boto3
- name: Setup awscurl
working-directory: tests/integration
run: |
wget https://github.com/frankfliu/junkyard/releases/download/v0.3.1/awscurl
chmod +x awscurl
- name: Run benchmark job
working-directory: tests/integration
run: |
echo "${{ needs.environment-setup.outputs.template }}" >> template.json
python3 instant_benchmark.py --template template.json \
--job ${{ matrix.job }} --instance ${{ github.event.inputs.instance }} \
--container deepjavalibrary/djl-serving:${{ github.event.inputs.container }}
bash instant_benchmark.sh
- name: Configure AWS Credentials
if: ${{ github.event.inputs.record == 'table' }}
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving
aws-region: us-east-1
- name: Record benchmark job
if: ${{ github.event.inputs.record == 'table' }}
working-directory: tests/integration
run: |
python3 record_benchmark.py --template template.json \
--job ${{ matrix.job }} --instance ${{ github.event.inputs.instance }} \
--model models/test \
--container deepjavalibrary/djl-serving:${{ github.event.inputs.container }} --record table
- name: Get serving logs
if: always()
working-directory: tests/integration
run: |
docker rm -f $(docker ps -aq) || true
cat logs/serving.log
- name: Upload test artifacts
uses: actions/upload-artifact@v3
if: always()
with:
name: ${{ matrix.job }}
path: tests/integration
stop-runners:
if: always()
runs-on: [ self-hosted, scheduler ]
needs: [ create-runners, environment-setup, benchmark_run ]
steps:
- name: Stop instances
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-runners.outputs.gpu_instance_id }}
./stop_instance.sh $instance_id