Unit Tests #65
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Unit Tests | |
defaults: | |
run: | |
shell: bash -le {0} | |
on: | |
schedule: | |
- cron: '0 19 * * *' | |
repository_dispatch: | |
workflow_dispatch: | |
inputs: | |
repo: | |
description: 'GitHub repo {owner}/{repo}' | |
required: false | |
default: '' | |
ref: | |
description: 'GitHub ref: Branch, Tag or Commit SHA' | |
required: false | |
default: '' | |
test_names: | |
description: 'Input Test(s) to Run (default all)' | |
required: false | |
default: '' | |
artifact_id: | |
description: 'Run id for artifact to be downloaded' | |
required: false | |
default: '' | |
env: | |
CUDA_DEVICE_ORDER: PCI_BUS_ID | |
CUDA_VISIBLE_DEVICES: 0 | |
ZEN3_SERVER: 10.0.13.0 # set to a wrong ip to keep this feature but not use it | |
ZEN4_SERVER: 10.0.14.199 | |
CPU_TEST_FILES: "test_qbits.py" | |
EXCLUDED_TEST_FILES: "test_tgi.py" | |
repo: ${{ github.event.inputs.repo || github.repository }} | |
ref: ${{ github.event.inputs.ref || github.ref }} | |
concurrency: | |
group: ${{ github.ref }}-workflow-unit-tests | |
cancel-in-progress: true | |
jobs: | |
check-vm: | |
runs-on: self-hosted | |
container: | |
image: modelcloud/gptqmodel:alpine-ci-v1 | |
outputs: | |
ip: ${{ steps.get_ip.outputs.ip }} | |
tag: ${{ steps.get_ip.outputs.tag }} | |
run_id: ${{ steps.get_ip.outputs.run_id }} | |
steps: | |
- name: Print env | |
run: | | |
echo "repo: ${{ env.repo }}" | |
echo "ref: ${{ env.ref }}" | |
- name: Select server | |
id: get_ip | |
run: | | |
if [[ "${{ runner.name }}" == *"zen4"* ]]; then | |
echo "current ci is zen4" | |
response=0 | |
else | |
echo "test zen4 vm status" | |
response=$(curl --silent --fail --max-time 5 http://$ZEN4_SERVER/gpu/runner/status/zen4) || response=error | |
if [ "$response" == "error" ]; then | |
echo "test zen3 vm status" | |
response=$(curl --silent --fail --max-time 5 http://${ZEN3_SERVER}/gpu/runner/status/zen4) || response=error | |
fi | |
fi | |
echo "response: $response" | |
if [ "$response" == "0" ]; then | |
tag="zen4" | |
elif [ "$response" == "-1" ]; then | |
tag="zen3" | |
else | |
echo "Error: Unexpected result - $response" | |
exit 1 | |
fi | |
echo "Runner tag: $tag" | |
response=$(curl -s --head --fail --max-time 5 http://${ZEN4_SERVER}/gpu/status) || response=error | |
if echo "$response" | grep "200 OK" > /dev/null; then | |
echo "Zen4 server is online. set ip to $ip" | |
ip=${ZEN4_SERVER} | |
else | |
response=$(curl -s --head --max-time 5 http://${ZEN3_SERVER}/gpu/status) || response=error | |
if echo "$response" | grep "200 OK" > /dev/null; then | |
ip=${ZEN3_SERVER} | |
echo "ZEN3 server is online. set ip to $ip" | |
else | |
echo "ZEN3 server is offline." | |
exit 1 | |
fi | |
fi | |
echo "ip=$ip" >> "$GITHUB_OUTPUT" | |
echo "tag=$tag" >> "$GITHUB_OUTPUT" | |
echo "GPU_IP=$ip" >> $GITHUB_ENV | |
echo "tag: $tag, ip: $ip" | |
if [ -n "${{ github.event.inputs.artifact_id }}" ]; then | |
run_id="${{ github.event.inputs.artifact_id }}" | |
else | |
run_id="${{ github.run_id }}" | |
fi | |
echo "run_id=$run_id" >> "$GITHUB_OUTPUT" | |
echo "artifact_id=$run_id" | |
build-zen4: | |
runs-on: [self-hosted, zen4] | |
needs: check-vm | |
if: needs.check-vm.outputs.tag == 'zen4' && github.event.inputs.artifact_id == '' | |
container: | |
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2 | |
steps: | |
- name: Print Env | |
run: | | |
echo "== pyenv ==" | |
pyenv versions | |
echo "== python ==" | |
python --version | |
echo "== nvcc ==" | |
#nvcc --version | |
echo "== torch ==" | |
pip show torch | |
- name: Checkout Codes | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.repo }} | |
ref: ${{ env.ref }} | |
- name: Install requirements | |
run: | | |
# cuda_version=$(echo ${{ matrix.tag }} | grep -oP 'cuda\K[0-9.]+') | |
# torch_version=$(echo ${{ matrix.tag }} | grep -oP 'torch\K[0-9.]+') | |
# python_version=$(echo ${{ matrix.tag }} | grep -oP 'python\K[0-9.]+') | |
/bin/bash /tmp/init/init_env.sh 12.4 2.4.1 3.11 | |
pip install transformers -U | |
- name: Compile | |
timeout-minutes: 35 | |
run: python setup.py bdist_wheel | |
- name: Test install | |
run: | | |
ls -ahl dist | |
whl=$(ls -t dist/*.whl | head -n 1 | xargs basename) | |
sha256=$(sha256sum dist/$whl) | |
echo "hash=$sha256" | |
echo "WHL_HASH=$sha256" >> $GITHUB_ENV | |
echo "WHL_NAME=$whl" >> $GITHUB_ENV | |
twine check dist/$whl | |
pip install dist/$whl | |
- name: Upload wheel | |
continue-on-error: true | |
run: | | |
curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.WHL_HASH }}" -F "file=@dist/${{ env.WHL_NAME }}" http://${{ needs.check-vm.outputs.ip }}/gpu/whl/upload | |
- name: Upload to artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: dist | |
path: dist | |
build-zen3: | |
runs-on: [self-hosted, zen3] | |
needs: check-vm | |
if: needs.check-vm.outputs.tag == 'zen3' && github.event.inputs.artifact_id == '' | |
container: | |
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2 | |
steps: | |
- name: Print Env | |
run: | | |
echo "== pyenv ==" | |
pyenv versions | |
echo "== python ==" | |
python --version | |
echo "== nvcc ==" | |
#nvcc --version | |
echo "== torch ==" | |
pip show torch | |
- name: Checkout Codes | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.repo }} | |
ref: ${{ env.ref }} | |
- name: Install requirements | |
run: | | |
# cuda_version=$(echo ${{ matrix.tag }} | grep -oP 'cuda\K[0-9.]+') | |
# torch_version=$(echo ${{ matrix.tag }} | grep -oP 'torch\K[0-9.]+') | |
# python_version=$(echo ${{ matrix.tag }} | grep -oP 'python\K[0-9.]+') | |
/bin/bash /tmp/init/init_env.sh 12.4 2.4.1 3.11 | |
pip install transformers -U | |
- name: Compile | |
timeout-minutes: 35 | |
run: python setup.py bdist_wheel | |
- name: Test install | |
run: | | |
ls -ahl dist | |
whl=$(ls -t dist/*.whl | head -n 1 | xargs basename) | |
sha256=$(sha256sum dist/$whl) | |
echo "hash=$sha256" | |
echo "WHL_HASH=$sha256" >> $GITHUB_ENV | |
echo "WHL_NAME=$whl" >> $GITHUB_ENV | |
twine check dist/$whl | |
pip install dist/$whl | |
- name: Upload wheel | |
continue-on-error: true | |
run: | | |
curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.WHL_HASH }}" -F "file=@dist/${{ env.WHL_NAME }}" http://${{ needs.check-vm.outputs.ip }}/gpu/whl/upload | |
- name: Upload to artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: dist | |
path: dist | |
list-test-files: | |
runs-on: ubuntu-latest | |
outputs: | |
cpu-files: ${{ steps.files.outputs.cpu-files }} | |
gpu-files: ${{ steps.files.outputs.gpu-files }} | |
steps: | |
- name: Checkout Codes | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.repo }} | |
ref: ${{ env.ref }} | |
- name: List files | |
id: files | |
run: | | |
script=" | |
import json | |
import os | |
cpu_file_list = [f.strip().removesuffix('.py') for f in '${CPU_TEST_FILES}'.split(',') if f.strip()] | |
test_files_list = [f.strip().removesuffix('.py') for f in '${{ github.event.inputs.test_names }}'.split(',') if f.strip()] | |
cpu_test_files = [f for f in cpu_file_list if not test_files_list or f in test_files_list] | |
all_tests = [f.removesuffix('.py') for f in os.listdir('tests/') if f.startswith('test_') and f.endswith('.py')] | |
all_tests = [item for item in all_tests if item.strip().removesuffix('.py') not in '${CPU_TEST_FILES}'] | |
gpu_test_files = [f for f in all_tests if f not in cpu_file_list and (not test_files_list or f in test_files_list)] | |
print(f'{json.dumps(cpu_test_files)}|{json.dumps(gpu_test_files)}') | |
" | |
test_files=$(python3 -c "$script") | |
IFS='|' read -r cpu_test_files gpu_test_files <<< "$test_files" | |
echo "cpu-files=$cpu_test_files" >> "$GITHUB_OUTPUT" | |
echo "gpu-files=$gpu_test_files" >> "$GITHUB_OUTPUT" | |
echo "Test files: $test_files" | |
echo "CPU Test files: $cpu_test_files" | |
echo "GPU Test files: $gpu_test_files" | |
test_gpu: | |
needs: | |
- build-zen4 | |
- build-zen3 | |
- list-test-files | |
- check-vm | |
runs-on: self-hosted | |
if: always() && !cancelled() && (needs.build-zen4.result == 'success' || needs.build-zen3.result == 'success' || github.event.inputs.artifact_id != '') | |
timeout-minutes: 40 | |
container: | |
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2 | |
strategy: | |
fail-fast: false | |
max-parallel: 6 | |
matrix: | |
test_script: ${{ fromJSON(needs.list-test-files.outputs.gpu-files) }} | |
steps: | |
- name: Print Env | |
run: | | |
echo "== pyenv ==" | |
pyenv versions | |
echo "== python ==" | |
python --version | |
echo "== nvcc ==" | |
#nvcc --version | |
echo "== torch ==" | |
pip show torch | |
if [ -n "${{ github.event.inputs.artifact_id }}" ]; then | |
run_id="${{ github.event.inputs.artifact_id }}" | |
else | |
run_id="${{ github.run_id }}" | |
fi | |
echo "RUN_ID=$run_id" >> $GITHUB_ENV | |
echo "RUN_ID=${run_id}" | |
- name: Checkout Codes | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ github.event.inputs.repo }} | |
ref: ${{ github.event.inputs.ref }} | |
- name: Download wheel | |
continue-on-error: true | |
run: | | |
file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download") | |
if echo "$file_name" | grep -q "gptqmodel"; then | |
mkdir dist || true | |
cd dist | |
curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name | |
ls -ahl . | |
sha256=$(sha256sum $file_name) | |
echo "sha256=$sha256" | |
echo "DOWNLOADED=1" >> $GITHUB_ENV | |
fi | |
- name: Download artifact | |
if: env.DOWNLOADED == '' | |
uses: actions/download-artifact@v4 | |
with: | |
name: dist | |
path: dist | |
run-id: ${{ needs.check-vm.outputs.run_id }} | |
- name: Install wheel | |
run: | | |
pip install bitblas parameterized uvicorn -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} | |
pip install dist/*.whl | |
- name: Check platform | |
run: | | |
if [[ "${{ runner.name }}" == *"zen4"* ]]; then | |
ip=${ZEN4_SERVER} | |
else | |
ip=${ZEN3_SERVER} | |
fi | |
echo "GPU_IP=$ip" >> $GITHUB_ENV | |
- name: Find suitable GPU | |
run: | | |
timestamp=$(date +%s%3N) | |
gpu_id=-1 | |
while [ "$gpu_id" -lt 0 ]; do | |
gpu_id=$(curl -s "http://${{ env.GPU_IP }}/gpu/get?id=${{ github.run_id }}×tamp=$timestamp") | |
if [ "$gpu_id" -lt 0 ]; then | |
echo "http://${{ env.GPU_IP }}/gpu/get?id=${{ github.run_id }}×tamp=$timestamp returned $gpu_id" | |
echo "No available GPU, waiting 5 seconds..." | |
sleep 5 | |
else | |
echo "Allocated GPU ID: $gpu_id" | |
fi | |
done | |
echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV | |
echo "STEP_TIMESTAMP=$timestamp" >> $GITHUB_ENV | |
echo "CUDA_VISIBLE_DEVICES set to $gpu_id, timestamp=$timestamp" | |
- name: Install requirements | |
run: | | |
# cuda_version=$(echo ${{ matrix.tag }} | grep -oP 'cuda\K[0-9.]+') | |
# torch_version=$(echo ${{ matrix.tag }} | grep -oP 'torch\K[0-9.]+') | |
# python_version=$(echo ${{ matrix.tag }} | grep -oP 'python\K[0-9.]+') | |
/bin/bash /tmp/init/init_env.sh 12.4 2.4.1 3.11 | |
pip install transformers -U | |
- name: Run tests | |
if: ${{ !github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script) }} | |
run: pytest --durations=0 tests/${{ matrix.test_script }}.py | |
- name: Release GPU | |
if: always() | |
run: curl -X GET "http://${{ env.GPU_IP }}/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}×tamp=${{ env.STEP_TIMESTAMP }}" | |
test_cpu: | |
needs: | |
- build-zen4 | |
- build-zen3 | |
- list-test-files | |
- check-vm | |
runs-on: self-hosted | |
if: always() && !cancelled() && (needs.build-zen4.result == 'success' || needs.build-zen3.result == 'success' || github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.cpu-files != '[]' | |
timeout-minutes: 40 | |
container: | |
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2 | |
strategy: | |
fail-fast: false | |
matrix: | |
test_script: ${{ fromJSON(needs.list-test-files.outputs.cpu-files) }} | |
steps: | |
- name: Print Env | |
run: | | |
echo "== pyenv ==" | |
pyenv versions | |
echo "== python ==" | |
python --version | |
echo "== nvcc ==" | |
#nvcc --version | |
echo "== torch ==" | |
pip show torch | |
- name: Checkout Codes | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.repo }} | |
ref: ${{ env.ref }} | |
- name: Download wheel | |
continue-on-error: true | |
run: | | |
file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download") | |
if echo "$file_name" | grep -q "gptqmodel"; then | |
mkdir dist || true | |
cd dist | |
curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name | |
ls -ahl . | |
sha256=$(sha256sum $file_name) | |
echo "sha256=$sha256" | |
echo "DOWNLOADED=1" >> $GITHUB_ENV | |
fi | |
- name: Download artifact | |
if: env.DOWNLOADED == '' | |
uses: actions/download-artifact@v4 | |
with: | |
name: dist | |
path: dist | |
run-id: ${{ needs.check-vm.outputs.run_id }} | |
- name: Install wheel | |
run: pip install dist/*.whl -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} | |
- name: Run tests | |
if: ${{ !github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script) }} | |
run: pytest --durations=0 tests/${{ matrix.test_script }}.py |