Skip to content

Recompute KV cache for Phi3 when switching from short to long factor #1675

Recompute KV cache for Phi3 when switching from short to long factor

Recompute KV cache for Phi3 when switching from short to long factor #1675

Workflow file for this run

name: "Android x64 Build"
on:
workflow_dispatch:
push:
branches:
- main
- rel-*
pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1"
ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime"
ORT_NIGHTLY_SOURCE: "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json"
# ANDROID_ABI: "arm64-v8a"
ANDROID_ABI: "x86_64"
jobs:
android_x64:
# Note: linux is the only good option for the Android emulator currently.
# it doesn't work on macos-14.
# HVF error: HV_UNSUPPORTED
# it works on macos-13 but with macos-15 being released soon that isn't a long term solution.
runs-on: ubuntu-latest
steps:
- name: Checkout OnnxRuntime GenAI repo
uses: actions/checkout@v4
with:
submodules: true
- name: Setup Java 17
uses: actions/setup-java@v4
with:
java-version: '17'
distribution: 'temurin'
cache: 'gradle'
- name: Setup Gradle
uses: gradle/actions/setup-gradle@v3
with:
gradle-version: '8.6'
# Check the NDK that we're using
- name: Check Android NDKs
run: |
set -e -x
uname -m
echo "ANDROID_NDK_HOME=$ANDROID_NDK_HOME"
echo "ANDROID_NDK_LATEST_HOME=$ANDROID_NDK_LATEST_HOME"
ls -l $ANDROID_HOME/ndk
# Needed for linux
- name: Install jq
run: |
sudo apt-get install jq
- uses: actions/setup-dotnet@v4
with:
dotnet-version: '8.0.x'
- name: Checkout OnnxRuntime GenAI repo
uses: actions/checkout@v4
with:
submodules: true
- name: Get the Latest OnnxRuntime Nightly Version
run: |
ORT_NIGHTLY_VERSION=$(curl -s "${{ env.ORT_NIGHTLY_REST_API }}" | jq -r '.value[0].versions[0].normalizedVersion')
echo "$ORT_NIGHTLY_VERSION"
echo "ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" >> $GITHUB_ENV
# have to create a dummy project to use `add package`
- name: Download OnnxRuntime Nightly
run: |
dotnet new console
dotnet add package ${{ env.ORT_PACKAGE_NAME }} --version ${{ env.ORT_NIGHTLY_VERSION }} --source ${{ env.ORT_NIGHTLY_SOURCE }} --package-directory .
- name: Extract ONNX Runtime AAR
run: |
set -e -x
ls -l
unzip microsoft.ml.onnxruntime/${{ env.ORT_NIGHTLY_VERSION }}/runtimes/android/native/onnxruntime.aar -d ort
ls -lR ort
- name: Create Android build
run: |
set -e -x
rm -rf build
./build.sh --android --android_api=27 --android_ndk_path=${ANDROID_NDK_LATEST_HOME} --config=RelWithDebInfo --android_abi=${{ env.ANDROID_ABI }} --parallel --build_java --update
- name: Run Android build
run: |
set -e -x
./build.sh --android --android_api=27 --android_ndk_path=${ANDROID_NDK_LATEST_HOME} --config=RelWithDebInfo --android_abi=${{ env.ANDROID_ABI }} --parallel --build_java --build
- name: Enable KVM group perms so Android emulator can run
run: |
echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules
sudo udevadm control --reload-rules
sudo udevadm trigger --name-match=kvm
- name: Run Android tests
run: |
set -e -x
./build.sh --android --android_api=27 --android_ndk_path=${ANDROID_NDK_LATEST_HOME} --config=RelWithDebInfo --android_abi=${{ env.ANDROID_ABI }} --parallel --build_java --android_run_emulator --test