Inferentia2 integration tests #99
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Inferentia2 integration tests | |
on: | |
workflow_dispatch: | |
inputs: | |
djl-version: | |
description: 'The released version of DJL' | |
required: false | |
default: '' | |
schedule: | |
- cron: '0 15 * * *' | |
jobs: | |
create-runners: | |
runs-on: [self-hosted, scheduler] | |
steps: | |
- name: Create new Inf2.24xl instance | |
id: create_inf2 | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_inf2 $token djl-serving | |
- name: Create new Inf2.24xl instance | |
id: create_inf2_2 | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_inf2 $token djl-serving | |
outputs: | |
inf2_instance_id_1: ${{ steps.create_inf2.outputs.action_inf2_instance_id }} | |
inf2_instance_id_2: ${{ steps.create_inf2_2.outputs.action_inf2_instance_id }} | |
transformers-neuronx-test-1: | |
runs-on: [ self-hosted, inf2 ] | |
timeout-minutes: 90 | |
needs: create-runners | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests numpy pillow | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: tests/integration | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
mkdir logs | |
./download_models.sh pytorch-inf2 | |
- name: Test Pytorch model | |
working-directory: tests/integration | |
run: | | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \ | |
serve -m test::PyTorch:nc0=file:/opt/ml/model/resnet18_inf2_2_4.tar.gz | |
./test_client.sh image/jpg models/kitten.jpg | |
docker rm -f $(docker ps -aq) | |
- name: Test Python mode | |
working-directory: tests/integration | |
run: | | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \ | |
serve -m test::Python:nc0=file:/opt/ml/model/resnet18_inf2_2_4.tar.gz | |
./test_client.sh image/jpg models/kitten.jpg | |
docker rm -f $(docker ps -aq) | |
- name: Test transformers-neuronx gpt2 with handler | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py transformers_neuronx gpt2 | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \ | |
serve | |
curl http://127.0.0.1:8080/models | |
python3 llm/client.py transformers_neuronx gpt2 | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: Test transformers-neuronx gpt2 quantization with handler | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py transformers_neuronx gpt2-quantize | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \ | |
serve | |
curl http://127.0.0.1:8080/models | |
python3 llm/client.py transformers_neuronx gpt2-quantize | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: Test transformers-neuronx opt-1.3b with handler | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py transformers_neuronx opt-1.3b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \ | |
serve | |
curl http://127.0.0.1:8080/models | |
python3 llm/client.py transformers_neuronx opt-1.3b | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: Test transformers-neuronx gpt-j-6b with handler | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py transformers_neuronx gpt-j-6b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \ | |
serve | |
curl http://127.0.0.1:8080/models | |
python3 llm/client.py transformers_neuronx gpt-j-6b | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: Test transformers-neuronx pythia-2.8b with handler | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py transformers_neuronx pythia-2.8b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \ | |
serve | |
curl http://127.0.0.1:8080/models | |
python3 llm/client.py transformers_neuronx pythia-2.8b | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: Test transformers-neuronx bloom-7b1 with handler | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py transformers_neuronx bloom-7b1 | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \ | |
serve | |
curl http://127.0.0.1:8080/models | |
python3 llm/client.py transformers_neuronx bloom-7b1 | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: Test gpt2 partition | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
python3 llm/prepare.py transformers_neuronx_aot gpt2 | |
# To test the requirements.txt download. | |
echo "dummy_test" >> $PWD/models/test/requirements.txt | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \ | |
partition --model-dir /opt/ml/input/data/training/ --skip-copy | tee partition_output.log | |
# checking if neff files are generated. | |
sudo mv $PWD/models/test/partition-test $PWD/models/ | |
if ls $PWD/models/partition-test/compiled/*.neff &>/dev/null; \ | |
then echo "compiled files generated"; else exit 1; fi | |
# checking whether requirements.txt download is successful | |
if grep -F "pip install requirements succeed!" partition_output.log &>/dev/null; \ | |
then echo "requirements.txt install was successful"; else exit 1; fi | |
- name: Test gpt2-quantize partition | |
working-directory: tests/integration | |
run: | | |
sudo rm -rf models | |
python3 llm/prepare.py transformers_neuronx_aot gpt2-quantize | |
# To test the requirements.txt download. | |
echo "dummy_test" >> $PWD/models/test/requirements.txt | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \ | |
partition --model-dir /opt/ml/input/data/training/ --skip-copy | tee partition_output.log | |
# checking if neff files are generated. | |
sudo mv $PWD/models/test/partition-test $PWD/models/ | |
if ls $PWD/models/partition-test/compiled/*.neff &>/dev/null; \ | |
then echo "compiled files generated"; else exit 1; fi | |
# checking whether requirements.txt download is successful | |
if grep -F "pip install requirements succeed!" partition_output.log &>/dev/null; \ | |
then echo "requirements.txt install was successful"; else exit 1; fi | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v3 | |
with: | |
name: transformers-neuronx-${{ matrix.arch }}-logs | |
path: tests/integration/logs/ | |
transformers-neuronx-test-2: | |
runs-on: [ self-hosted, inf2 ] | |
timeout-minutes: 90 | |
needs: create-runners | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ | |
echo "wait dpkg lock..." | |
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done | |
- name: Set up Python3 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10.x' | |
- name: Install pip dependencies | |
run: pip3 install requests numpy pillow | |
- name: Build container name | |
run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }} | |
- name: Download models and dockers | |
working-directory: tests/integration | |
run: | | |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG | |
mkdir logs | |
./download_models.sh pytorch-inf2 | |
- name: Test transformers-neuronx open-llama-7b with handler | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py transformers_neuronx open-llama-7b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \ | |
serve | |
curl http://127.0.0.1:8080/models | |
python3 llm/client.py transformers_neuronx open-llama-7b | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: Test streaming transformers-neuronx opt-1.3b with handler | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py transformers_neuronx opt-1.3b-streaming | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \ | |
serve | |
curl http://127.0.0.1:8080/models | |
python3 llm/client.py transformers_neuronx opt-1.3b-streaming | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: Test stable diffusion 1.5 with handler | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py transformers_neuronx stable-diffusion-1.5-neuron | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \ | |
serve | |
curl http://127.0.0.1:8080/models | |
python3 llm/client.py neuron-stable-diffusion stable-diffusion-1.5-neuron | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: Test stable diffusion bf16 with handler | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py transformers_neuronx stable-diffusion-2.1-neuron | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \ | |
serve | |
curl http://127.0.0.1:8080/models | |
python3 llm/client.py neuron-stable-diffusion stable-diffusion-2.1-neuron | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: Test stable diffusion xl with handler | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py transformers_neuronx stable-diffusion-xl-neuron | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \ | |
serve | |
curl http://127.0.0.1:8080/models | |
python3 llm/client.py neuron-stable-diffusion stable-diffusion-xl-neuron | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: Test load split model with llama2 7B in handler | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py transformers_neuronx llama-7b-split | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \ | |
serve | |
curl http://127.0.0.1:8080/models | |
python3 llm/client.py transformers_neuronx llama-7b-split | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: Test load optimum llama2 7B in handler | |
working-directory: tests/integration | |
run: | | |
rm -rf models | |
python3 llm/prepare.py transformers_neuronx llama2-7b | |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \ | |
serve | |
curl http://127.0.0.1:8080/models | |
python3 llm/client.py transformers_neuronx llama2-7b | |
docker rm -f $(docker ps -aq) | |
sudo rm -rf models | |
- name: On fail step | |
if: ${{ failure() }} | |
working-directory: tests/integration | |
run: | | |
cat logs/serving.log | |
- name: Upload test logs | |
uses: actions/upload-artifact@v3 | |
with: | |
name: transformers-neuronx-${{ matrix.arch }}-logs | |
path: tests/integration/logs/ | |
stop-runners: | |
if: always() | |
runs-on: [ self-hosted, scheduler ] | |
needs: [ create-runners, transformers-neuronx-test-1, transformers-neuronx-test-2 ] | |
steps: | |
- name: Stop all instances | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
instance_id=${{ needs.create-runners.outputs.inf2_instance_id_1 }} | |
./stop_instance.sh $instance_id | |
instance_id=${{ needs.create-runners.outputs.inf2_instance_id_2 }} | |
./stop_instance.sh $instance_id |