Add graph capture and shaders to improve DirectML performance #1351

Workflow file for this run

.github/workflows/linux-gpu-x64-build.yml at 27c834c

	name: "Linux CUDA x64 Build"
	on: [ workflow_dispatch, pull_request ]

	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref \|\| github.run_id }}
	cancel-in-progress: true

	env:
	ort_dir: "onnxruntime-linux-x64-gpu-1.17.3"
	ort_zip: "onnxruntime-linux-x64-gpu-1.17.3.tgz"
	ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-linux-x64-gpu-1.17.3.tgz"

	jobs:
	linux-cuda-x64-build:
	env :
	PYTHON_EXECUTABLE: "/opt/python/cp38-cp38/bin/python3.8"
	runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-Ubuntu2004-T4" ]
	steps:
	- name: Checkout OnnxRuntime GenAI repo
	uses: actions/checkout@v4
	with:
	submodules: true
	# We are using the same manylinux repo as the one used in the packaging build
	- name: Checkout ManyLinux repo
	uses: actions/checkout@v4
	with:
	repository: pypa/manylinux
	ref: 5eda9aded5462201e6310105728d33016e637ea7
	clean: true
	path: manylinux
	submodules: true

	- name: Download OnnxRuntime
	run: \|
	curl -L -o ${{ env.ort_zip }} ${{ env.ort_url }}

	- name: Unzip OnnxRuntime
	run: \|
	tar -xzf ${{ env.ort_zip }}
	rm ${{ env.ort_zip }}

	- name: Rename OnnxRuntime to ort
	run: \|
	mv ${{ env.ort_dir }} ort

	- name: Get Docker Image
	run: \|
	set -e -x
	az login --identity --username 63b63039-6328-442f-954b-5a64d124e5b4
	az acr login --name onnxruntimebuildcache --subscription 00c06639-6ee4-454e-8058-8d8b1703bd87
	python3 tools/ci_build/get_docker_image.py --dockerfile tools/ci_build/github/linux/docker/manylinux/Dockerfile.manylinux2_28_cuda \
	--context tools/ci_build/github/linux/docker/manylinux \
	--docker-build-args "--build-arg BUILD_UID=$( id -u )" \
	--container-registry onnxruntimebuildcache \
	--manylinux-src manylinux \
	--multiple_repos \
	--repository onnxruntimecudabuildx64

	- name: Config with Cmake in Docker
	run: \|
	set -e -x
	docker run \
	--gpus all \
	--rm \
	--volume $GITHUB_WORKSPACE:/ort_genai_src \
	-w /ort_genai_src onnxruntimecudabuildx64 \
	bash -c " \
	/usr/bin/cmake --preset linux_gcc_cuda_release \
	-DMANYLINUX=ON \
	-DPYTHON_EXECUTABLE=${{ env.PYTHON_EXECUTABLE }} "

	- name: Build with Cmake in Docker
	run: \|
	set -e -x
	docker run \
	--gpus all \
	--rm \
	--volume $GITHUB_WORKSPACE:/ort_genai_src \
	-w /ort_genai_src onnxruntimecudabuildx64 \
	bash -c " \
	/usr/bin/cmake --build --preset linux_gcc_cuda_release --parallel $( nproc )"

	- name: Get HuggingFace Token
	run: \|
	az login --identity --username 63b63039-6328-442f-954b-5a64d124e5b4
	HF_TOKEN=$(az keyvault secret show --vault-name anubissvcsecret --name ANUBIS-HUGGINGFACE-TOKEN --query value)
	echo "::add-mask::$HF_TOKEN"
	echo "HF_TOKEN=$HF_TOKEN" >> $GITHUB_ENV

	- name: Install the onnxruntime-genai Python wheel and run python test
	run: \|
	echo "Installing the onnxruntime-genai Python wheel and running the Python tests"
	docker run \
	--gpus all \
	--rm \
	--volume $GITHUB_WORKSPACE:/ort_genai_src \
	-e HF_TOKEN=$HF_TOKEN \
	-w /ort_genai_src onnxruntimecudabuildx64 bash -c " \
	${{ env.PYTHON_EXECUTABLE }} -m pip install /ort_genai_src/build/cuda/wheel/onnxruntime_genaimanylinux.whl --user && \
	${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/requirements-gpu.txt --user && \
	${{ env.PYTHON_EXECUTABLE }} test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models"

	- name: Docker -- Run unit tests
	run: \|
	echo "Running docker image onnxruntimecudabuildx64"
	docker run \
	--gpus all \
	--rm \
	--volume $GITHUB_WORKSPACE:/ort_genai_src \
	-w /ort_genai_src onnxruntimecudabuildx64 bash -c "/ort_genai_src/build/cuda/test/unit_tests"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add graph capture and shaders to improve DirectML performance #1351

Workflow file

Add graph capture and shaders to improve DirectML performance #1351

Jobs

Run details

Workflow file for this run