-
Notifications
You must be signed in to change notification settings - Fork 803
173 lines (144 loc) · 5.47 KB
/
test.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
name: Python Tests on M1 Mac
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
unit_test:
runs-on: macos-14
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.12'
- name: Cache huggingface hub models
uses: actions/cache@v3
with:
path: ~/.cache/huggingface/hub
key: ${{ runner.os }}-huggingface-hub-${{ hashFiles('~/.cache/huggingface/hub/**/*') }}-${{ github.job }}
- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
pip install .
- name: Run tests
run: |
# Check if cached files are present
ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true
# Run unit tests
METAL_XCODE=1 python3 -m exo.inference.test_inference_engine
discovery_integration_test:
runs-on: macos-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
pip install .
- name: Run discovery integration test
run: |
# Start first instance
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 > output1.log 2>&1 &
PID1=$!
# Start second instance
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 > output2.log 2>&1 &
PID2=$!
# Wait for discovery
sleep 10
# Stop both instances
kill $PID1 $PID2
# Check outputs
if grep -q "Connected to peer" output1.log && grep -q "Connected to peer" output2.log; then
echo "Test passed: Both instances discovered each other"
exit 0
else
echo "Test failed: Devices did not discover each other"
echo "Output of first instance:"
cat output1.log
echo "Output of second instance:"
cat output2.log
exit 1
fi
chatgpt_api_integration_test:
runs-on: macos-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Cache huggingface hub models
uses: actions/cache@v3
with:
path: ~/.cache/huggingface/hub
key: ${{ runner.os }}-huggingface-hub-${{ hashFiles('~/.cache/huggingface/hub/**/*') }}-${{ github.job }}
restore-keys: |
${{ runner.os }}-huggingface-hub-
- name: Cache tinygrad downloaded models
uses: actions/cache@v3
with:
path: ~/Library/Caches/tinygrad/downloads
key: ${{ runner.os }}-tinygrad-downloads-${{ hashFiles('~/Library/Caches/tinygrad/downloads/**/*') }}-${{ github.job }}
restore-keys: |
${{ runner.os }}-tinygrad-downloads-
- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
pip install .
- name: Run chatgpt api integration test
run: |
# Check if cached files are present
ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true
# Start first instance
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout-secs 900 > output1.log 2>&1 &
PID1=$!
# Start second instance
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout-secs 900 > output2.log 2>&1 &
PID2=$!
# Wait for discovery
sleep 10
# first one to load the model
curl -s http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llama-3-8b",
"messages": [{"role": "user", "content": "Keep responses concise. Placeholder to load model..."}],
"temperature": 0.7
}'
response_1=$(curl -s http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llama-3-8b",
"messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
"temperature": 0.7
}')
echo "Response 1: $response_1"
response_2=$(curl -s http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llama-3-8b",
"messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
"temperature": 0.7
}')
echo "Response 2: $response_2"
# Stop both instances
kill $PID1 $PID2
echo ""
if ! echo "$response_1" | grep -q "Michael Jackson" || ! echo "$response_2" | grep -q "Michael Jackson"; then
echo "Test failed: Response does not contain 'Michael Jackson'"
echo "Response 1: $response_1"
echo ""
echo "Response 2: $response_2"
echo "Output of first instance:"
cat output1.log
echo "Output of second instance:"
cat output2.log
exit 1
else
echo "Test passed: Response from both nodes contains 'Michael Jackson'"
fi