Skip to content

Unit Tests

Unit Tests #45

Workflow file for this run

name: Unit Tests
defaults:
run:
shell: bash -le {0}
on:
schedule:
- cron: '0 19 * * *'
repository_dispatch:
workflow_dispatch:
inputs:
repo:
description: 'GitHub repo {owner}/{repo}'
required: false
default: ''
ref:
description: 'GitHub ref: Branch, Tag or Commit SHA'
required: false
default: ''
test_names:
description: 'Input Test(s) to Run (default all)'
required: false
default: ''
artifact_id:
description: 'Run id for artifact to be downloaded'
required: false
default: ''
env:
CUDA_DEVICE_ORDER: PCI_BUS_ID
CUDA_VISIBLE_DEVICES: 0
AMD_SERVER: 10.0.13.31
INTEL_SERVER: 10.0.23.35
CPU_TEST_FILES: "test_qbits.py"
repo: ${{ github.event.inputs.repo || github.repository }}
ref: ${{ github.event.inputs.ref || github.ref }}
concurrency:
group: ${{ github.ref }}-workflow-unit-tests
cancel-in-progress: true
jobs:
check-vm:
runs-on: self-hosted
container:
image: modelcloud/gptqmodel:alpine-ci-v1
outputs:
ip: ${{ steps.get_ip.outputs.ip }}
tag: ${{ steps.get_ip.outputs.tag }}
run_id: ${{ steps.get_ip.outputs.run_id }}
steps:
- name: Print env
run: |
echo "repo: ${{ env.repo }}"
echo "ref: ${{ env.ref }}"
- name: Select server
id: get_ip
run: |
if [[ "${{ runner.name }}" == *"intel"* ]]; then
echo "current ci is intel"
response=0
else
echo "test intel vm status"
response=$(curl --silent --fail --max-time 5 http://$INTEL_SERVER/gpu/runner/status/intel) || response=error
if [ "$response" == "error" ]; then
echo "test amd vm status"
response=$(curl --silent --fail --max-time 5 http://${AMD_SERVER}/gpu/runner/status/intel) || response=error
fi
fi
echo "response: $response"
if [ "$response" == "0" ]; then
tag="intel"
elif [ "$response" == "-1" ]; then
tag="amd"
else
echo "Error: Unexpected result - $response"
exit 1
fi
echo "Runner tag: $tag"
response=$(curl -s --head --fail --max-time 5 http://${INTEL_SERVER}/gpu/status) || response=error
if echo "$response" | grep "200 OK" > /dev/null; then
echo "Intel server is online. set ip to $ip"
ip=${INTEL_SERVER}
else
response=$(curl -s --head --max-time 5 http://${AMD_SERVER}/gpu/status) || response=error
if echo "$response" | grep "200 OK" > /dev/null; then
ip=${AMD_SERVER}
echo "AMD server is online. set ip to $ip"
else
echo "AMD server is offline."
exit 1
fi
fi
echo "ip=$ip" >> "$GITHUB_OUTPUT"
echo "tag=$tag" >> "$GITHUB_OUTPUT"
echo "GPU_IP=$ip" >> $GITHUB_ENV
echo "tag: $tag, ip: $ip"
if [ -n "${{ github.event.inputs.artifact_id }}" ]; then
run_id="${{ github.event.inputs.artifact_id }}"
else
run_id="${{ github.run_id }}"
fi
echo "run_id=$run_id" >> "$GITHUB_OUTPUT"
echo "artifact_id=$run_id"
build-intel:
runs-on: [self-hosted, intel]
needs: check-vm
if: needs.check-vm.outputs.tag == 'intel' && github.event.inputs.artifact_id == ''
container:
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2
steps:
- name: Print Env
run: |
echo "== pyenv =="
pyenv versions
echo "== python =="
python --version
echo "== nvcc =="
nvcc --version
echo "== torch =="
pip show torch
- name: Checkout Codes
uses: actions/checkout@v4
with:
repository: ${{ env.repo }}
ref: ${{ env.ref }}
- name: Install requirements
run: pip install -r requirements.txt -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}
- name: Compile
timeout-minutes: 35
run: python setup.py bdist_wheel
- name: Test install
run: |
ls -ahl dist
whl=$(ls -t dist/*.whl | head -n 1 | xargs basename)
sha256=$(sha256sum dist/$whl)
echo "hash=$sha256"
echo "WHL_HASH=$sha256" >> $GITHUB_ENV
echo "WHL_NAME=$whl" >> $GITHUB_ENV
twine check dist/$whl
pip install dist/$whl
- name: Upload wheel
continue-on-error: true
run: |
curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.WHL_HASH }}" -F "file=@dist/${{ env.WHL_NAME }}" http://${{ needs.check-vm.outputs.ip }}/gpu/whl/upload
- name: Upload to artifact
uses: actions/upload-artifact@v4
with:
name: dist
path: dist
build-amd:
runs-on: [self-hosted, amd]
needs: check-vm
if: needs.check-vm.outputs.tag == 'amd' && github.event.inputs.artifact_id == ''
container:
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2
steps:
- name: Print Env
run: |
echo "== pyenv =="
pyenv versions
echo "== python =="
python --version
echo "== nvcc =="
nvcc --version
echo "== torch =="
pip show torch
- name: Checkout Codes
uses: actions/checkout@v4
with:
repository: ${{ env.repo }}
ref: ${{ env.ref }}
- name: Install requirements
run: |
pip install -r requirements.txt -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}
pip install twine -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}
- name: Compile
timeout-minutes: 35
run: python setup.py bdist_wheel
- name: Test install
run: |
ls -ahl dist
whl=$(ls -t dist/*.whl | head -n 1 | xargs basename)
sha256=$(sha256sum dist/$whl)
echo "hash=$sha256"
echo "WHL_HASH=$sha256" >> $GITHUB_ENV
echo "WHL_NAME=$whl" >> $GITHUB_ENV
twine check dist/$whl
pip install dist/$whl
- name: Upload wheel
continue-on-error: true
run: |
curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.WHL_HASH }}" -F "file=@dist/${{ env.WHL_NAME }}" http://${{ needs.check-vm.outputs.ip }}/gpu/whl/upload
- name: Upload to artifact
uses: actions/upload-artifact@v4
with:
name: dist
path: dist
list-test-files:
runs-on: ubuntu-latest
outputs:
cpu-files: ${{ steps.files.outputs.cpu-files }}
gpu-files: ${{ steps.files.outputs.gpu-files }}
steps:
- name: Checkout Codes
uses: actions/checkout@v4
with:
repository: ${{ env.repo }}
ref: ${{ env.ref }}
- name: List files
id: files
run: |
script="
import json
import os
cpu_file_list = [f.strip().removesuffix('.py') for f in '${CPU_TEST_FILES}'.split(',') if f.strip()]
test_files_list = [f.strip().removesuffix('.py') for f in '${{ github.event.inputs.test_names }}'.split(',') if f.strip()]
cpu_test_files = [f for f in cpu_file_list if not test_files_list or f in test_files_list]
all_tests = [f.removesuffix('.py') for f in os.listdir('tests/') if f.startswith('test_') and f.endswith('.py')]
gpu_test_files = [f for f in all_tests if f not in cpu_file_list and (not test_files_list or f in test_files_list)]
print(f'{json.dumps(cpu_test_files)}|{json.dumps(gpu_test_files)}')
"
test_files=$(python3 -c "$script")
IFS='|' read -r cpu_test_files gpu_test_files <<< "$test_files"
echo "cpu-files=$cpu_test_files" >> "$GITHUB_OUTPUT"
echo "gpu-files=$gpu_test_files" >> "$GITHUB_OUTPUT"
echo "Test files: $test_files"
echo "CPU Test files: $cpu_test_files"
echo "GPU Test files: $gpu_test_files"
test_gpu:
needs:
- build-intel
- build-amd
- list-test-files
- check-vm
runs-on: self-hosted
if: always() && !cancelled() && (needs.build-intel.result == 'success' || needs.build-amd.result == 'success' || github.event.inputs.artifact_id != '')
timeout-minutes: 40
container:
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2
strategy:
fail-fast: false
max-parallel: 6
matrix:
test_script: ${{ fromJSON(needs.list-test-files.outputs.gpu-files) }}
steps:
- name: Print Env
run: |
echo "== pyenv =="
pyenv versions
echo "== python =="
python --version
echo "== nvcc =="
nvcc --version
echo "== torch =="
pip show torch
if [ -n "${{ github.event.inputs.artifact_id }}" ]; then
run_id="${{ github.event.inputs.artifact_id }}"
else
run_id="${{ github.run_id }}"
fi
echo "RUN_ID=$run_id" >> $GITHUB_ENV
echo "RUN_ID=${run_id}"
- name: Checkout Codes
uses: actions/checkout@v4
with:
repository: ${{ github.event.inputs.repo }}
ref: ${{ github.event.inputs.ref }}
- name: Download wheel
continue-on-error: true
run: |
file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download")
if echo "$file_name" | grep -q "gptqmodel"; then
mkdir dist || true
cd dist
curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
ls -ahl .
sha256=$(sha256sum $file_name)
echo "sha256=$sha256"
echo "DOWNLOADED=1" >> $GITHUB_ENV
fi
- name: Download artifact
if: env.DOWNLOADED == ''
uses: actions/download-artifact@v4
with:
name: dist
path: dist
run-id: ${{ needs.check-vm.outputs.run_id }}
- name: Install wheel
run: |
pip install bitblas parameterized uvicorn -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}
pip install dist/*.whl
- name: Check platform
run: |
if [[ "${{ runner.name }}" == *"intel"* ]]; then
ip=${INTEL_SERVER}
else
ip=${AMD_SERVER}
fi
echo "GPU_IP=$ip" >> $GITHUB_ENV
- name: Find suitable GPU
run: |
timestamp=$(date +%s%3N)
gpu_id=-1
while [ "$gpu_id" -lt 0 ]; do
gpu_id=$(curl -s "http://${{ env.GPU_IP }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp")
if [ "$gpu_id" -lt 0 ]; then
echo "http://${{ env.GPU_IP }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp returned $gpu_id"
echo "No available GPU, waiting 5 seconds..."
sleep 5
else
echo "Allocated GPU ID: $gpu_id"
fi
done
echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV
echo "STEP_TIMESTAMP=$timestamp" >> $GITHUB_ENV
echo "CUDA_VISIBLE_DEVICES set to $gpu_id, timestamp=$timestamp"
- name: Install requirements
run: pip install optimum
- name: Run tests
if: ${{ !github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script) }}
run: pytest --durations=0 tests/${{ matrix.test_script }}.py
- name: Release GPU
if: always()
run: curl -X GET "http://${{ env.GPU_IP }}/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&timestamp=${{ env.STEP_TIMESTAMP }}"
test_cpu:
needs:
- build-intel
- build-amd
- list-test-files
- check-vm
runs-on: self-hosted
if: always() && !cancelled() && (needs.build-intel.result == 'success' || needs.build-amd.result == 'success' || github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.cpu-files != '[]'
timeout-minutes: 40
container:
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v2
strategy:
fail-fast: false
matrix:
test_script: ${{ fromJSON(needs.list-test-files.outputs.cpu-files) }}
steps:
- name: Print Env
run: |
echo "== pyenv =="
pyenv versions
echo "== python =="
python --version
echo "== nvcc =="
nvcc --version
echo "== torch =="
pip show torch
- name: Checkout Codes
uses: actions/checkout@v4
with:
repository: ${{ env.repo }}
ref: ${{ env.ref }}
- name: Download wheel
continue-on-error: true
run: |
file_name=$(curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ github.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download")
if echo "$file_name" | grep -q "gptqmodel"; then
mkdir dist || true
cd dist
curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
ls -ahl .
sha256=$(sha256sum $file_name)
echo "sha256=$sha256"
echo "DOWNLOADED=1" >> $GITHUB_ENV
fi
- name: Download artifact
if: env.DOWNLOADED == ''
uses: actions/download-artifact@v4
with:
name: dist
path: dist
run-id: ${{ needs.check-vm.outputs.run_id }}
- name: Install wheel
run: pip install dist/*.whl -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}
- name: Run tests
if: ${{ !github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script) }}
run: pytest --durations=0 tests/${{ matrix.test_script }}.py