Large model integration tests #401
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Large model integration tests | |
on: | |
workflow_dispatch: | |
inputs: | |
djl-version: | |
description: 'The released version of DJL' | |
required: false | |
default: '' | |
run_test: | |
description: 'Run only the tests you need [ds, hf, aot, trtllm, lora-correctness, smoothquant]' | |
required: false | |
default: '' | |
schedule: | |
- cron: '0 15 * * *' | |
jobs: | |
create-runners: | |
runs-on: [self-hosted, scheduler] | |
steps: | |
- name: Create new G5 instance | |
id: create_gpu | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_g5 $token djl-serving | |
- name: Create new G5 instance | |
id: create_gpu2 | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_g5 $token djl-serving | |
- name: Create new G5 instance | |
id: create_gpu3 | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_g5 $token djl-serving | |
outputs: | |
gpu_instance_id_1: ${{ steps.create_gpu.outputs.action_g5_instance_id }} | |
gpu_instance_id_2: ${{ steps.create_gpu2.outputs.action_g5_instance_id }} | |
gpu_instance_id_3: ${{ steps.create_gpu3.outputs.action_g5_instance_id }} | |
ds-raw-test: | |
if: contains(fromJson('["", "ds"]'), github.event.inputs.run_test) | |
runs-on: [ self-hosted, g5 ] | |
timeout-minutes: 60 | |
needs: create-runners | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests numpy | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh deepspeed ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: tests/integration | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: Test bloom-7b | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py deepspeed_raw bloom-7b1 | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve -m test=file:/opt/ml/model/test/ | |
python3 llm/client.py deepspeed_raw bloom-7b1 | |
docker rm -f $(docker ps -aq) | |
- name: Test GPTJ-6B | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py deepspeed_raw gpt-j-6b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve -m test=file:/opt/ml/model/test/ | |
python3 llm/client.py deepspeed_raw gpt-j-6b | |
docker rm -f $(docker ps -aq) | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
docker rm -f $(docker ps -aq) || true | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v3 | |
with: | |
name: ds-raw-logs | |
path: tests/integration/logs/ | |
hf-handler-test: | |
if: contains(fromJson('["", "hf"]'), github.event.inputs.run_test) | |
runs-on: [ self-hosted, g5 ] | |
timeout-minutes: 60 | |
needs: create-runners | |
strategy: | |
matrix: | |
arch: [ deepspeed ] | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests numpy huggingface_hub | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh ${{ matrix.arch }} ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: tests/integration | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: Test gpt-neo | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py huggingface gpt-neo-2.7b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve -m test=file:/opt/ml/model/test/ | |
python3 llm/client.py huggingface gpt-neo-2.7b | |
docker rm -f $(docker ps -aq) | |
- name: Test bloom-7b | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py huggingface bloom-7b1 | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py huggingface bloom-7b1 | |
docker rm -f $(docker ps -aq) | |
- name: Test LLAMA-7b | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py huggingface open-llama-7b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py huggingface open-llama-7b | |
docker rm -f $(docker ps -aq) | |
- name: Test GPTJ-6B | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py huggingface gpt-j-6b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py huggingface gpt-j-6b | |
docker rm -f $(docker ps -aq) | |
- name: Test gpt4all-lora | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py huggingface gpt4all-lora | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py huggingface gpt4all-lora | |
docker rm -f $(docker ps -aq) | |
- name: Test batch unmerged lora - llama7b | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "ENABLE_ADAPTERS_PREVIEW=true" > docker_env | |
python3 llm/prepare.py huggingface llama-7b-unmerged-lora | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py huggingface llama-7b-unmerged-lora | |
docker rm -f $(docker ps -aq) | |
- name: Test streaming bigscience/bloom-3b | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "CUDA_VISIBLE_DEVICES=1,2" > docker_env | |
python3 llm/prepare.py huggingface bigscience/bloom-3b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py huggingface bigscience/bloom-3b | |
rm -rf docker_env | |
docker rm -f $(docker ps -aq) | |
- name: Test streaming t5-large | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "CUDA_VISIBLE_DEVICES=1" > docker_env | |
python3 llm/prepare.py huggingface t5-large | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py huggingface t5-large | |
rm -rf docker_env | |
docker rm -f $(docker ps -aq) | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
docker rm -f $(docker ps -aq) || true | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v3 | |
with: | |
name: hf-handler-${{ matrix.arch }}-logs | |
path: tests/integration/logs/ | |
hf-lora-correctness-test: | |
if: contains(fromJson('["", "lora-correctness"]'), github.event.inputs.run_test) | |
runs-on: [ self-hosted, g5 ] | |
timeout-minutes: 60 | |
needs: create-runners | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests numpy | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh deepspeed ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: tests/integration | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: Test unmerged lora llama-7b correctness | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "ENABLE_ADAPTERS_PREVIEW=true" > docker_env | |
python3 llm/prepare.py unmerged_lora llama-7b-unmerged-lora | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py unmerged_lora llama-7b-unmerged-lora | |
docker rm -f $(docker ps -aq) | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
docker rm -f $(docker ps -aq) || true | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v3 | |
with: | |
name: hf-lora-correctness-${{ matrix.arch }}-logs | |
path: tests/integration/logs/ | |
ds-handler-test: | |
if: contains(fromJson('["", "ds"]'), github.event.inputs.run_test) | |
runs-on: [ self-hosted, g5 ] | |
timeout-minutes: 60 | |
needs: create-runners | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests pillow numpy | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh deepspeed ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: tests/integration | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: Test stable-diffusion-2-1-base | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py stable-diffusion stable-diffusion-2-1-base | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py stable-diffusion stable-diffusion-2-1-base | |
docker rm -f $(docker ps -aq) | |
- name: Test stable-diffusion-v1-5 | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py stable-diffusion stable-diffusion-v1-5 | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py stable-diffusion stable-diffusion-v1-5 | |
docker rm -f $(docker ps -aq) | |
- name: Test stable-diffusion-2-depth | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py stable-diffusion stable-diffusion-2-depth | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py stable-diffusion stable-diffusion-2-depth | |
docker rm -f $(docker ps -aq) | |
- name: Test bloom-7b | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py deepspeed bloom-7b1 | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py deepspeed bloom-7b1 | |
docker rm -f $(docker ps -aq) | |
- name: Test LLAMA-7B | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py deepspeed open-llama-7b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py deepspeed open-llama-7b | |
docker rm -f $(docker ps -aq) | |
- name: Test GPTJ-6B | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py deepspeed gpt-j-6b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py deepspeed gpt-j-6b | |
docker rm -f $(docker ps -aq) | |
- name: Test OPT-13B | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py deepspeed opt-13b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py deepspeed opt-13b | |
docker rm -f $(docker ps -aq) | |
- name: Test gpt4all-lora | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "ENABLE_ADAPTERS_PREVIEW=true" > docker_env | |
python3 llm/prepare.py deepspeed gpt4all-lora | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py deepspeed gpt4all-lora | |
docker rm -f $(docker ps -aq) | |
- name: Test streaming gpt-neo-1.3b | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "CUDA_VISIBLE_DEVICES=1,3" > docker_env | |
python3 llm/prepare.py deepspeed gpt-neo-1.3b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py deepspeed gpt-neo-1.3b | |
rm -rf docker_env | |
docker rm -f $(docker ps -aq) | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
docker rm -f $(docker ps -aq) || true | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v3 | |
with: | |
name: ds-handler-logs | |
path: tests/integration/logs/ | |
ds-aot-raw-test: | |
if: contains(fromJson('["", "aot"]'), github.event.inputs.run_test) | |
runs-on: [ self-hosted, g5 ] | |
timeout-minutes: 60 | |
needs: create-runners | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests numpy | |
- name: Install s5cmd | |
working-directory: serving/docker | |
run: sudo scripts/install_s5cmd.sh x64 | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh deepspeed ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: serving/docker | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: Test gpt-neo-2.7b partition | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
python3 llm/prepare.py deepspeed_aot gpt-neo-2.7b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
partition --model-dir /opt/ml/input/data/training | tee partition_output.log | |
# checking if pt files are generated. | |
sudo mv $PWD/models/test/partition-test $PWD/models/ | |
if ls $PWD/models/partition-test/*.pt &>/dev/null ; then echo "checkpoint files generated"; else exit 1; fi | |
- name: Test gpt-neo-2.7b inference | |
working-directory: tests/integration | |
run: | | |
sudo cp $PWD/models/test/model.py $PWD/models/partition-test | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve -m test=file:/opt/ml/model/partition-test/ | |
curl http://127.0.0.1:8080/models | |
python3 llm/client.py deepspeed_aot gpt-neo-2.7b | |
docker rm -f $(docker ps -aq) | |
- name: Remove models dir | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
docker rm -f $(docker ps -aq) || true | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v3 | |
with: | |
name: ds-aot-${{ matrix.arch }}-logs | |
path: tests/integration/logs/ | |
ds-handler-aot-test: | |
if: contains(fromJson('["", "aot"]'), github.event.inputs.run_test) | |
runs-on: [ self-hosted, g5 ] | |
timeout-minutes: 60 | |
needs: create-runners | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests numpy | |
- name: Install s5cmd | |
working-directory: serving/docker | |
run: sudo scripts/install_s5cmd.sh x64 | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh deepspeed ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: serving/docker | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: Test opt-6.7b partition | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
python3 llm/prepare.py deepspeed_handler_aot opt-6.7b | |
# To test the requirements.txt download. | |
echo "dummy_test" >> $PWD/models/test/requirements.txt | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
partition --model-dir /opt/ml/input/data/training/ | tee partition_output.log | |
# checking if pt files are generated. | |
sudo mv $PWD/models/test/partition-test $PWD/models/ | |
if ls $PWD/models/partition-test/*.pt &>/dev/null ; then echo "checkpoint files generated"; else exit 1; fi | |
# checking whether requirements.txt download is successful | |
if grep -F "pip install requirements succeed!" partition_output.log &>/dev/null; \ | |
then echo "requirements.txt install was successful"; else exit 1; fi | |
- name: Test opt-6.7b inference | |
working-directory: tests/integration | |
run: | | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve -m test=file:/opt/ml/model/partition-test/ | |
python3 llm/client.py deepspeed_handler_aot opt-6.7b | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: Test bloom-7b1 partition | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
python3 llm/prepare.py deepspeed_handler_aot bloom-7b1 | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
train | tee partition_output.log | |
# checking if pt files are generated. | |
# downloads the uploaded partitioned checkpoints from s3url. | |
/opt/djl/bin/s5cmd --retry-count 1 sync s3://djl-llm/bloom-7b1-tp4/ds-aot-handler/* $PWD/models/partition-test | |
if ls $PWD/models/partition-test/*.pt &>/dev/null ; then echo "checkpoint files generated"; else exit 1; fi | |
if ls $PWD/models/partition-test/ds_inference_config.json &>/dev/null ; \ | |
then echo "ds_inference_config.json generated"; else exit 1; fi | |
- name: Test bloom-7b1 inference | |
working-directory: tests/integration | |
run: | | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve -m test=file:/opt/ml/model/partition-test/ | |
python3 llm/client.py deepspeed_handler_aot bloom-7b1 | |
docker rm -f $(docker ps -aq) | |
- name: Remove models dir | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
docker rm -f $(docker ps -aq) || true | |
name: ds-aot-handler-logs | |
no-code-test: | |
if: contains(fromJson('["", "hf"]'), github.event.inputs.run_test) | |
runs-on: [ self-hosted, g5 ] | |
timeout-minutes: 60 | |
needs: create-runners | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests numpy | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh deepspeed ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: serving/docker | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: Test nomic-ai/gpt4all-j inference | |
working-directory: tests/integration | |
run: | | |
echo -en "HF_MODEL_ID=s3://djl-llm/gpt4all-j/" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG no_code deepspeed | |
python3 llm/client.py huggingface no-code/nomic-ai/gpt4all-j | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: Test databricks/dolly-v2-7b inference | |
working-directory: tests/integration | |
run: | | |
echo -en "HF_MODEL_ID=s3://djl-llm/dolly-v2-7b\nTENSOR_PARALLEL_DEGREE=2" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG no_code deepspeed | |
python3 llm/client.py huggingface no-code/databricks/dolly-v2-7b | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: Test google/flan-t5-xl inference | |
working-directory: tests/integration | |
run: | | |
echo -en "HF_MODEL_ID=s3://djl-llm/flan-t5-xl\nTENSOR_PARALLEL_DEGREE=2" > docker_env | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG no_code deepspeed | |
python3 llm/client.py huggingface no-code/google/flan-t5-xl | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
docker rm -f $(docker ps -aq) || true | |
sudo rm -rf models | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v3 | |
with: | |
name: huggingface-logs | |
path: tests/integration/logs/ | |
ds-smoothquant-handler-test: | |
if: contains(fromJson('["", "smoothquant"]'), github.event.inputs.run_test) | |
runs-on: [ self-hosted, g5 ] | |
timeout-minutes: 60 | |
needs: create-runners | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests pillow numpy | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh deepspeed ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: tests/integration | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: Test gpt-j-6b default smoothquant | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py deepspeed_smoothquant gpt-j-6b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py deepspeed_smoothquant gpt-j-6b | |
docker rm -f $(docker ps -aq) | |
- name: Test gpt-neox-20b smoothquant custom alpha | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py deepspeed_smoothquant gpt-neox-20b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py deepspeed_smoothquant gpt-neox-20b | |
docker rm -f $(docker ps -aq) | |
- name: Test llama2-13b dynamic_quant only | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py deepspeed_smoothquant llama2-13b-dynamic-int8 | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py deepspeed_smoothquant llama2-13b-dynamic-int8 | |
docker rm -f $(docker ps -aq) | |
- name: Test llama2-13b smoothquant | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py deepspeed_smoothquant llama2-13b-smoothquant | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ | |
serve | |
python3 llm/client.py deepspeed_smoothquant llama2-13b-smoothquant | |
docker rm -f $(docker ps -aq) | |
- name: Remove models dir | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
docker rm -f $(docker ps -aq) || true | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v3 | |
with: | |
name: ds-smoothquant-logs | |
path: tests/integration/logs/ | |
trt-llm-handler-test: | |
if: contains(fromJson('["", "trtllm"]'), github.event.inputs.run_test) | |
runs-on: [ self-hosted, g5 ] | |
timeout-minutes: 120 | |
needs: create-runners | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests numpy huggingface_hub | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh tensorrt-llm ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: tests/integration | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: llama2-13b HF model with tp=4 | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env | |
python3 llm/prepare.py trtllm llama2-13b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \ | |
serve | |
python3 llm/client.py trtllm llama2-13b | |
rm -rf docker_env | |
docker rm -f $(docker ps -aq) | |
- name: falcon-7b triton repo with tp=1 | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "CUDA_VISIBLE_DEVICES=0" > docker_env | |
python3 llm/prepare.py trtllm falcon-7b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \ | |
serve | |
python3 llm/client.py trtllm falcon-7b | |
rm -rf docker_env | |
docker rm -f $(docker ps -aq) | |
- name: internlm-7b HF model with tp=4 | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env | |
python3 llm/prepare.py trtllm internlm-7b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \ | |
serve | |
python3 llm/client.py trtllm internlm-7b | |
rm -rf docker_env | |
docker rm -f $(docker ps -aq) | |
- name: baichuan2-13b HF model with tp=4 | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env | |
python3 llm/prepare.py trtllm baichuan2-13b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \ | |
serve | |
python3 llm/client.py trtllm baichuan2-13b | |
rm -rf docker_env | |
docker rm -f $(docker ps -aq) | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
docker rm -f $(docker ps -aq) || true | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v3 | |
with: | |
name: trtllm-handler-logs | |
path: tests/integration/logs/ | |
trt-llm-handler-quantization-test: | |
if: contains(fromJson('["", "trtllm"]'), github.event.inputs.run_test) | |
runs-on: [ self-hosted, g5 ] | |
timeout-minutes: 120 | |
needs: create-runners | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests numpy huggingface_hub | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh tensorrt-llm ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: tests/integration | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
- name: llama2-7b HF model with tp=4 and smoothquant | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env | |
python3 llm/prepare.py trtllm llama2-7b-smoothquant | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm-sq \ | |
serve | |
python3 llm/client.py trtllm llama2-7b-smoothquant | |
rm -rf docker_env | |
docker rm -f $(docker ps -aq) | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
docker rm -f $(docker ps -aq) || true | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v3 | |
with: | |
name: trtllm-handler-quantization-logs | |
path: tests/integration/logs/ | |
stop-runners: | |
if: always() | |
runs-on: [ self-hosted, scheduler ] | |
needs: [ create-runners, hf-handler-test, hf-lora-correctness-test, ds-raw-test, ds-handler-test, ds-aot-raw-test, no-code-test, ds-handler-aot-test, ds-smoothquant-handler-test, trt-llm-handler-test, trt-llm-handler-quantization-test] | |
steps: | |
- name: Stop all instances | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
instance_id=${{ needs.create-runners.outputs.gpu_instance_id_1 }} | |
./stop_instance.sh $instance_id | |
instance_id=${{ needs.create-runners.outputs.gpu_instance_id_2 }} | |
./stop_instance.sh $instance_id | |
instance_id=${{ needs.create-runners.outputs.gpu_instance_id_3 }} | |
./stop_instance.sh $instance_id |