Recompute KV cache for Phi3 when switching from short to long factor #1675
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "Android x64 Build" | |
on: | |
workflow_dispatch: | |
push: | |
branches: | |
- main | |
- rel-* | |
pull_request: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
env: | |
ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1" | |
ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime" | |
ORT_NIGHTLY_SOURCE: "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json" | |
# ANDROID_ABI: "arm64-v8a" | |
ANDROID_ABI: "x86_64" | |
jobs: | |
android_x64: | |
# Note: linux is the only good option for the Android emulator currently. | |
# it doesn't work on macos-14. | |
# HVF error: HV_UNSUPPORTED | |
# it works on macos-13 but with macos-15 being released soon that isn't a long term solution. | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout OnnxRuntime GenAI repo | |
uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- name: Setup Java 17 | |
uses: actions/setup-java@v4 | |
with: | |
java-version: '17' | |
distribution: 'temurin' | |
cache: 'gradle' | |
- name: Setup Gradle | |
uses: gradle/actions/setup-gradle@v3 | |
with: | |
gradle-version: '8.6' | |
# Check the NDK that we're using | |
- name: Check Android NDKs | |
run: | | |
set -e -x | |
uname -m | |
echo "ANDROID_NDK_HOME=$ANDROID_NDK_HOME" | |
echo "ANDROID_NDK_LATEST_HOME=$ANDROID_NDK_LATEST_HOME" | |
ls -l $ANDROID_HOME/ndk | |
# Needed for linux | |
- name: Install jq | |
run: | | |
sudo apt-get install jq | |
- uses: actions/setup-dotnet@v4 | |
with: | |
dotnet-version: '8.0.x' | |
- name: Checkout OnnxRuntime GenAI repo | |
uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- name: Get the Latest OnnxRuntime Nightly Version | |
run: | | |
ORT_NIGHTLY_VERSION=$(curl -s "${{ env.ORT_NIGHTLY_REST_API }}" | jq -r '.value[0].versions[0].normalizedVersion') | |
echo "$ORT_NIGHTLY_VERSION" | |
echo "ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" >> $GITHUB_ENV | |
# have to create a dummy project to use `add package` | |
- name: Download OnnxRuntime Nightly | |
run: | | |
dotnet new console | |
dotnet add package ${{ env.ORT_PACKAGE_NAME }} --version ${{ env.ORT_NIGHTLY_VERSION }} --source ${{ env.ORT_NIGHTLY_SOURCE }} --package-directory . | |
- name: Extract ONNX Runtime AAR | |
run: | | |
set -e -x | |
ls -l | |
unzip microsoft.ml.onnxruntime/${{ env.ORT_NIGHTLY_VERSION }}/runtimes/android/native/onnxruntime.aar -d ort | |
ls -lR ort | |
- name: Create Android build | |
run: | | |
set -e -x | |
rm -rf build | |
./build.sh --android --android_api=27 --android_ndk_path=${ANDROID_NDK_LATEST_HOME} --config=RelWithDebInfo --android_abi=${{ env.ANDROID_ABI }} --parallel --build_java --update | |
- name: Run Android build | |
run: | | |
set -e -x | |
./build.sh --android --android_api=27 --android_ndk_path=${ANDROID_NDK_LATEST_HOME} --config=RelWithDebInfo --android_abi=${{ env.ANDROID_ABI }} --parallel --build_java --build | |
- name: Enable KVM group perms so Android emulator can run | |
run: | | |
echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules | |
sudo udevadm control --reload-rules | |
sudo udevadm trigger --name-match=kvm | |
- name: Run Android tests | |
run: | | |
set -e -x | |
./build.sh --android --android_api=27 --android_ndk_path=${ANDROID_NDK_LATEST_HOME} --config=RelWithDebInfo --android_abi=${{ env.ANDROID_ABI }} --parallel --build_java --android_run_emulator --test |