Inferentia2 integration tests #408

Workflow file for this run

.github/workflows/llm_inf2_integration.yml at dc52d18

	name: Inferentia2 integration tests

	on:
	workflow_dispatch:
	inputs:
	djl-version:
	description: 'The released version of DJL'
	required: false
	default: ''
	schedule:
	- cron: '0 15 * * *'


	jobs:
	create-runners:
	runs-on: [self-hosted, scheduler]
	steps:
	- name: Create new Inf2.24xl instance
	id: create_inf2
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
	https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
	--fail \
	\| jq '.token' \| tr -d '"' )
	./start_instance.sh action_inf2 $token djl-serving
	- name: Create new Inf2.24xl instance
	id: create_inf2_2
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
	https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
	--fail \
	\| jq '.token' \| tr -d '"' )
	./start_instance.sh action_inf2 $token djl-serving
	outputs:
	inf2_instance_id_1: ${{ steps.create_inf2.outputs.action_inf2_instance_id }}
	inf2_instance_id_2: ${{ steps.create_inf2_2.outputs.action_inf2_instance_id }}

	transformers-neuronx-test-1:
	runs-on: [ self-hosted, inf2 ]
	timeout-minutes: 90
	needs: create-runners
	steps:
	- uses: actions/checkout@v3
	- name: Clean env
	run: \|
	yes \| docker system prune -a --volumes
	sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
	echo "wait dpkg lock..."
	while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
	- name: Set up Python3
	uses: actions/setup-python@v4
	with:
	python-version: '3.10.x'
	- name: Install pip dependencies
	run: pip3 install requests numpy pillow
	- name: Build container name
	run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }}
	- name: Download models and dockers
	working-directory: tests/integration
	run: \|
	docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
	mkdir logs
	./download_models.sh pytorch-inf2
	- name: Test Pytorch model
	working-directory: tests/integration
	run: \|
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
	serve -m test::PyTorch:nc0=file:/opt/ml/model/resnet18_inf2_2_4.tar.gz
	./test_client.sh image/jpg models/kitten.jpg
	docker rm -f $(docker ps -aq)
	- name: Test Python mode
	working-directory: tests/integration
	run: \|
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
	serve -m test::Python:nc0=file:/opt/ml/model/resnet18_inf2_2_4.tar.gz
	./test_client.sh image/jpg models/kitten.jpg
	docker rm -f $(docker ps -aq)
	- name: Test transformers-neuronx gpt2 with handler
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py transformers_neuronx gpt2
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
	serve
	curl http://127.0.0.1:8080/models
	python3 llm/client.py transformers_neuronx gpt2
	docker rm -f $(docker ps -aq)
	sudo rm -rf models
	- name: Test transformers-neuronx gpt2 quantization with handler
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py transformers_neuronx gpt2-quantize
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
	serve
	curl http://127.0.0.1:8080/models
	python3 llm/client.py transformers_neuronx gpt2-quantize
	docker rm -f $(docker ps -aq)
	sudo rm -rf models
	- name: Test transformers-neuronx opt-1.3b with handler
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py transformers_neuronx opt-1.3b
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \
	serve
	curl http://127.0.0.1:8080/models
	python3 llm/client.py transformers_neuronx opt-1.3b
	docker rm -f $(docker ps -aq)
	sudo rm -rf models
	- name: Test transformers-neuronx gpt-j-6b with handler
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py transformers_neuronx gpt-j-6b
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \
	serve
	curl http://127.0.0.1:8080/models
	python3 llm/client.py transformers_neuronx gpt-j-6b
	docker rm -f $(docker ps -aq)
	sudo rm -rf models
	- name: Test transformers-neuronx pythia-2.8b with handler
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py transformers_neuronx pythia-2.8b
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
	serve
	curl http://127.0.0.1:8080/models
	python3 llm/client.py transformers_neuronx pythia-2.8b
	docker rm -f $(docker ps -aq)
	sudo rm -rf models
	- name: Test transformers-neuronx bloom-7b1 with handler
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py transformers_neuronx bloom-7b1
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
	serve
	curl http://127.0.0.1:8080/models
	python3 llm/client.py transformers_neuronx bloom-7b1
	docker rm -f $(docker ps -aq)
	sudo rm -rf models
	- name: Test gpt2 partition
	working-directory: tests/integration
	run: \|
	sudo rm -rf models
	python3 llm/prepare.py transformers_neuronx_aot gpt2
	# To test the requirements.txt download.
	echo "dummy_test" >> $PWD/models/test/requirements.txt

	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
	partition --model-dir /opt/ml/input/data/training/ --skip-copy \| tee partition_output.log

	# checking if neff files are generated.
	sudo mv $PWD/models/test/partition-test $PWD/models/
	if ls $PWD/models/partition-test/compiled/*.neff &>/dev/null; \
	then echo "compiled files generated"; else exit 1; fi

	# checking whether requirements.txt download is successful
	if grep -F "pip install requirements succeed!" partition_output.log &>/dev/null; \
	then echo "requirements.txt install was successful"; else exit 1; fi
	- name: Test gpt2-quantize partition
	working-directory: tests/integration
	run: \|
	sudo rm -rf models
	python3 llm/prepare.py transformers_neuronx_aot gpt2-quantize
	# To test the requirements.txt download.
	echo "dummy_test" >> $PWD/models/test/requirements.txt

	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
	partition --model-dir /opt/ml/input/data/training/ --skip-copy \| tee partition_output.log

	# checking if neff files are generated.
	sudo mv $PWD/models/test/partition-test $PWD/models/
	if ls $PWD/models/partition-test/compiled/*.neff &>/dev/null; \
	then echo "compiled files generated"; else exit 1; fi

	# checking whether requirements.txt download is successful
	if grep -F "pip install requirements succeed!" partition_output.log &>/dev/null; \
	then echo "requirements.txt install was successful"; else exit 1; fi
	- name: On fail step
	if: ${{ failure() }}
	working-directory: tests/integration
	run: \|
	cat logs/serving.log
	- name: Upload test logs
	uses: actions/upload-artifact@v3
	with:
	name: transformers-neuronx-${{ matrix.arch }}-logs
	path: tests/integration/logs/

	transformers-neuronx-test-2:
	runs-on: [ self-hosted, inf2 ]
	timeout-minutes: 90
	needs: create-runners
	steps:
	- uses: actions/checkout@v3
	- name: Clean env
	run: \|
	yes \| docker system prune -a --volumes
	sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
	echo "wait dpkg lock..."
	while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
	- name: Set up Python3
	uses: actions/setup-python@v4
	with:
	python-version: '3.10.x'
	- name: Install pip dependencies
	run: pip3 install requests numpy pillow
	- name: Build container name
	run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }}
	- name: Download models and dockers
	working-directory: tests/integration
	run: \|
	docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
	mkdir logs
	./download_models.sh pytorch-inf2
	- name: Test transformers-neuronx open-llama-7b with handler
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py transformers_neuronx open-llama-7b
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
	serve
	curl http://127.0.0.1:8080/models
	python3 llm/client.py transformers_neuronx open-llama-7b
	docker rm -f $(docker ps -aq)
	sudo rm -rf models
	- name: Test streaming transformers-neuronx opt-1.3b with handler
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py transformers_neuronx opt-1.3b-streaming
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \
	serve
	curl http://127.0.0.1:8080/models
	python3 llm/client.py transformers_neuronx opt-1.3b-streaming
	docker rm -f $(docker ps -aq)
	sudo rm -rf models
	- name: Test stable diffusion 1.5 with handler
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py transformers_neuronx stable-diffusion-1.5-neuron
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
	serve
	curl http://127.0.0.1:8080/models
	python3 llm/client.py neuron-stable-diffusion stable-diffusion-1.5-neuron
	docker rm -f $(docker ps -aq)
	sudo rm -rf models
	- name: Test stable diffusion bf16 with handler
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py transformers_neuronx stable-diffusion-2.1-neuron
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
	serve
	curl http://127.0.0.1:8080/models
	python3 llm/client.py neuron-stable-diffusion stable-diffusion-2.1-neuron
	docker rm -f $(docker ps -aq)
	sudo rm -rf models
	- name: Test stable diffusion xl with handler
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py transformers_neuronx stable-diffusion-xl-neuron
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
	serve
	curl http://127.0.0.1:8080/models
	python3 llm/client.py neuron-stable-diffusion stable-diffusion-xl-neuron
	docker rm -f $(docker ps -aq)
	sudo rm -rf models
	- name: Test load split model with llama2 7B in handler
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py transformers_neuronx llama-7b-split
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
	serve
	curl http://127.0.0.1:8080/models
	python3 llm/client.py transformers_neuronx llama-7b-split
	docker rm -f $(docker ps -aq)
	sudo rm -rf models
	- name: Test load optimum llama2 7B in handler
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py transformers_neuronx llama2-7b
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
	serve
	curl http://127.0.0.1:8080/models
	python3 llm/client.py transformers_neuronx llama2-7b
	docker rm -f $(docker ps -aq)
	sudo rm -rf models
	- name: On fail step
	if: ${{ failure() }}
	working-directory: tests/integration
	run: \|
	cat logs/serving.log
	- name: Upload test logs
	uses: actions/upload-artifact@v3
	with:
	name: transformers-neuronx-${{ matrix.arch }}-logs
	path: tests/integration/logs/

	stop-runners:
	if: always()
	runs-on: [ self-hosted, scheduler ]
	needs: [ create-runners, transformers-neuronx-test-1, transformers-neuronx-test-2 ]
	steps:
	- name: Stop all instances
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	instance_id=${{ needs.create-runners.outputs.inf2_instance_id_1 }}
	./stop_instance.sh $instance_id
	instance_id=${{ needs.create-runners.outputs.inf2_instance_id_2 }}
	./stop_instance.sh $instance_id

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Inferentia2 integration tests #408

Workflow file

Inferentia2 integration tests #408

Jobs

Run details

Workflow file for this run