Attention mask with cuda graph #1279
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "Linux GPU x64 Build" | |
on: [ workflow_dispatch, pull_request ] | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
env: | |
ort_dir: "onnxruntime-linux-x64-gpu-1.17.3" | |
ort_zip: "onnxruntime-linux-x64-gpu-1.17.3.tgz" | |
ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-linux-x64-gpu-1.17.3.tgz" | |
jobs: | |
linux-gpu-x64-build: | |
runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-Ubuntu2004-T4" ] | |
steps: | |
- name: Checkout OnnxRuntime GenAI repo | |
uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- name: Download OnnxRuntime | |
run: | | |
curl -L -o ${{ env.ort_zip }} ${{ env.ort_url }} | |
- name: Unzip OnnxRuntime | |
run: | | |
tar -xzf ${{ env.ort_zip }} | |
rm ${{ env.ort_zip }} | |
- name: Rename OnnxRuntime to ort | |
run: | | |
mv ${{ env.ort_dir }} ort | |
- name: Download Docker Image | |
run: | | |
set -e -x | |
az login --identity --username 63b63039-6328-442f-954b-5a64d124e5b4 | |
az acr login --name onnxruntimebuildcache --subscription 00c06639-6ee4-454e-8058-8d8b1703bd87 | |
python3 tools/ci_build/get_docker_image.py --dockerfile tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile \ | |
--context tools/ci_build/github/linux/docker/inference/x64/default/gpu \ | |
--docker-build-args "--build-arg BUILD_UID=$( id -u )" \ | |
--container-registry onnxruntimebuildcache \ | |
--repository ort_genai_linux_gpu_gha | |
- name: Print Docker Image Environment Variables | |
run: | | |
echo "Printing docker image environment variables" | |
docker run --rm ort_genai_linux_gpu_gha env | |
- name: Build with Cmake in Docker | |
run: | | |
echo "Running docker image ort_genai_linux_gpu_gha" | |
docker run \ | |
--gpus all \ | |
--rm \ | |
--volume $GITHUB_WORKSPACE:/onnxruntime_src \ | |
-w /onnxruntime_src ort_genai_linux_gpu_gha bash -c "echo $PATH && /usr/bin/cmake -DCMAKE_CUDA_ARCHITECTURES=86 --preset linux_gcc_cuda_release && /usr/bin/cmake --build --preset linux_gcc_cuda_release" | |
- name: Install the onnxruntime-genai Python wheel and run Python tests | |
run: | | |
echo "Installing the onnxruntime-genai Python wheel and running the Python tests" | |
docker run \ | |
--gpus all \ | |
--rm \ | |
--volume $GITHUB_WORKSPACE:/onnxruntime_src \ | |
-w /onnxruntime_src ort_genai_linux_gpu_gha bash -c "python3 -m pip install /onnxruntime_src/build/cuda/wheel/onnxruntime_genai*.whl --user && python3 -m pip install -r test/python/requirements.txt --user && python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models" | |
- name: Docker -- Run tests | |
run: | | |
echo "Running docker image ort_genai_linux_gpu_gha" | |
docker run \ | |
--gpus all \ | |
--rm \ | |
--volume $GITHUB_WORKSPACE:/onnxruntime_src \ | |
-w /onnxruntime_src ort_genai_linux_gpu_gha bash -c "/onnxruntime_src/build/cuda/test/unit_tests" |