Recompute KV cache for Phi3 when switching from short to long factor #3932
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "Windows CPU arm64 Build" | |
on: | |
workflow_dispatch: | |
push: | |
branches: | |
- main | |
- rel-* | |
pull_request: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
env: | |
binaryDir: 'build/cpu/win-arm64' | |
ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1" | |
ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime" | |
jobs: | |
windows-cpu-arm64-build: | |
runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-win11-arm64-cpu" ] | |
steps: | |
- name: Checkout OnnxRuntime GenAI repo | |
uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- name: Setup Visual Studio 2022 | |
uses: microsoft/[email protected] | |
with: | |
vs-version: '17.4' | |
msbuild-architecture: arm64 | |
- uses: nuget/setup-nuget@v2 | |
with: | |
nuget-version: '5.x' | |
- name: Setup Java 21 | |
uses: actions/setup-java@v4 | |
with: | |
java-version: '21' | |
distribution: 'temurin' | |
cache: 'gradle' | |
- name: Setup Gradle | |
uses: gradle/actions/setup-gradle@v3 | |
with: | |
gradle-version: '8.6' | |
- name: Download OnnxRuntime Nightly | |
shell: powershell | |
run: | | |
$resp = Invoke-RestMethod "${{ env.ORT_NIGHTLY_REST_API }}" | |
$ORT_NIGHTLY_VERSION = $resp.value[0].versions[0].normalizedVersion | |
Write-Host "$ORT_NIGHTLY_VERSION" | |
"ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" | Out-File -FilePath $env:GITHUB_ENV -Append | |
nuget install ${{ env.ORT_PACKAGE_NAME }} -version $ORT_NIGHTLY_VERSION -x -NonInteractive | |
- run: Get-ChildItem ${{ env.ORT_PACKAGE_NAME }} -Recurse | |
continue-on-error: true | |
- name: Extract OnnxRuntime library and header files | |
run: | | |
mkdir ort/lib | |
move ${{ env.ORT_PACKAGE_NAME }}/build/native/include ort/ | |
move ${{ env.ORT_PACKAGE_NAME }}/runtimes/win-arm64/native/* ort/lib/ | |
- name: Configure CMake | |
run: | | |
python -m pip install wheel requests | |
cmake --preset windows_arm64_cpu_release | |
- name: Build with CMake | |
run: | | |
cmake --build --preset windows_arm64_cpu_release --parallel | |
- name: Install the Python Wheel and Test Dependencies | |
run: | | |
python -m pip install "numpy<2" coloredlogs flatbuffers packaging protobuf sympy pytest | |
python -m pip install -i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ ort-nightly-qnn | |
python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps | |
- name: Run the Python Tests | |
run: | | |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" | |
- name: Build the C# API and Run the C# Tests | |
run: | | |
cd test\csharp | |
dotnet test /p:NativeBuildOutputDir="$env:GITHUB_WORKSPACE\$env:binaryDir\Release" /p:OrtLibDir="$env:GITHUB_WORKSPACE\ort\lib" | |
- name: Build the Java API and Run the Java Tests | |
run: | | |
python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel | |
- name: Verify Build Artifacts | |
if: always() | |
continue-on-error: true | |
run: | | |
Get-ChildItem -Path $env:GITHUB_WORKSPACE\$env:binaryDir -Recurse | |
Get-ChildItem -Path $env:GITHUB_WORKSPACE\$env:binaryDir\test -Recurse | |
- name: Run tests | |
run: | | |
copy $env:GITHUB_WORKSPACE\ort\lib\* .\$env:binaryDir\Release | |
& .\$env:binaryDir\Release\unit_tests.exe |