From f4278a96cd032990de24fc64a52493ab77a510af Mon Sep 17 00:00:00 2001 From: Chen Lai Date: Fri, 22 Nov 2024 14:23:21 -0800 Subject: [PATCH] Add qnn 16a16w quantization test (#7039) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/7039 Differential Revision: D66390212 --- .ci/scripts/test_llama.sh | 8 + .github/workflows/pull.yml | 834 ++++++++++++++++++------------------ .github/workflows/trunk.yml | 782 ++++++++++++++++----------------- 3 files changed, 817 insertions(+), 807 deletions(-) diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index dad3e1101f..bfb21a9880 100644 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe shift 2 ;; + -pt2e_quantize) + PT2E_QUANTIZE="$2" # portable or xnnpack+custom or xnnpack+custom+qe + shift 2 + ;; -upload) UPLOAD_DIR="$2" shift 2 @@ -234,6 +238,10 @@ if [[ "${COREML}" == "ON" ]]; then fi if [[ "${QNN}" == "ON" ]]; then EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape" + echo "PT2E_QUANTIZE is ${PT2E_QUANTIZE}" + if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then + EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once " + fi fi # Add dynamically linked library location $PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS} diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 6fc8ca9185..a66400d600 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -53,313 +53,313 @@ jobs: # Build and test ExecuTorch with the add model on portable backend. PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "add" "${BUILD_TOOL}" "portable" - test-models-linux: - name: test-models-linux - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - needs: gather-models - strategy: - matrix: ${{ fromJSON(needs.gather-models.outputs.models) }} - fail-fast: false - with: - runner: ${{ matrix.runner }} - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: ${{ matrix.timeout }} - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - MODEL_NAME=${{ matrix.model }} - BUILD_TOOL=${{ matrix.build-tool }} - BACKEND=${{ matrix.backend }} - DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }} - - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" - # Build and test ExecuTorch - PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}" - - test-llama-runner-linux: - name: test-llama-runner-linux - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - matrix: - dtype: [fp32] - mode: [portable, xnnpack+custom, xnnpack+custom+qe] - include: - - dtype: bf16 - mode: portable - - dtype: bf16 - mode: custom - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 900 - upload-artifact: android-models - upload-artifact-to-s3: true - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - DTYPE=${{ matrix.dtype }} - BUILD_TOOL="cmake" - MODE=${{ matrix.mode }} - ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}" - ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}" - - # Setup executorch - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" - # Install requirements for export_llama - PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh - # Test llama2 - PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}" - - test-llama-runner-linux-android: - name: test-llama-runner-linux-android - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-clang12-android - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - BUILD_TOOL="cmake" - PYTHON_EXECUTABLE=python \ - bash .ci/scripts/build_llama_android.sh "${BUILD_TOOL}" - - test-custom-ops-linux: - name: test-custom-ops-linux - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - BUILD_TOOL="cmake" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" - # Test custom ops - PYTHON_EXECUTABLE=python bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}" - - test-selective-build-linux: - name: test-selective-build-linux - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - BUILD_TOOL="cmake" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" - # Test selective build - PYTHON_EXECUTABLE=python bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}" - - test-llava-runner-linux: - name: test-llava-runner-linux - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - fail-fast: false - with: - runner: linux.24xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" - - # install pybind - bash install_requirements.sh --pybind xnnpack - - # install Llava requirements - bash examples/models/llama/install_requirements.sh - bash examples/models/llava/install_requirements.sh - - # run python unittest - python -m unittest examples.models.llava.test.test_llava - - # run e2e (export, tokenizer and runner) - PYTHON_EXECUTABLE=python bash .ci/scripts/test_llava.sh - - test-quantized-aot-lib-linux: - name: test-quantized-aot-lib-linux - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - BUILD_TOOL="cmake" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" - PYTHON_EXECUTABLE=python bash examples/xnnpack/quantization/test_quantize.sh "${BUILD_TOOL}" mv2 - - test-pybind-build-linux: - name: test-pybind-build-linux - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - # build module for executorch.extension.pybindings.portable_lib - BUILD_TOOL="cmake" - PYTHON_EXECUTABLE=python \ - EXECUTORCH_BUILD_XNNPACK=ON \ - EXECUTORCH_BUILD_PYBIND=ON \ - bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" - - # see if we can import the module successfully - python -c "from executorch.extension.pybindings import portable_lib; print('success!')" - - test-binary-size-linux-gcc: - name: test-binary-size-linux-gcc - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-gcc9 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - # build module for executorch.extension.pybindings.portable_lib - bash test/build_size_test.sh - strip cmake-out/test/size_test - output=$(ls -la cmake-out/test/size_test) - arr=($output) - size=${arr[4]} - # threshold=48120 on devserver with gcc11.4 - # todo(lfq): update once binary size is below 50kb. - threshold="51504" - if [[ "$size" -le "$threshold" ]]; then - echo "Success $size <= $threshold" - else - echo "Fail $size > $threshold" - exit 1 - fi - - test-binary-size-linux: - name: test-binary-size-linux - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - # build module for executorch.extension.pybindings.portable_lib - bash test/build_size_test.sh - strip cmake-out/test/size_test - output=$(ls -la cmake-out/test/size_test) - arr=($output) - size=${arr[4]} - # threshold=48120 on devserver with gcc11.4 - # todo(lfq): update once binary size is below 50kb. - threshold="51784" - if [[ "$size" -le "$threshold" ]]; then - echo "Success $size <= $threshold" - else - echo "Fail $size > $threshold" - exit 1 - fi - - android: - uses: ./.github/workflows/_android.yml - needs: test-llama-runner-linux - - unittest: - uses: ./.github/workflows/_unittest.yml - with: - docker-image: executorch-ubuntu-22.04-clang12 - - unittest-arm: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-arm-sdk - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - set -eux - - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - BUILD_TOOL="cmake" - - # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python \ - EXECUTORCH_BUILD_PYBIND=ON \ - EXECUTORCH_BUILD_ARM_BAREMETAL=ON \ - .ci/scripts/setup-linux.sh "${BUILD_TOOL}" - - source .ci/scripts/utils.sh - # Install Arm dependencies - install_arm - - # Run pytest with coverage - pytest -c /dev/null -v -n auto --cov=./ --cov-report=xml backends/arm/test + # test-models-linux: + # name: test-models-linux + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # needs: gather-models + # strategy: + # matrix: ${{ fromJSON(needs.gather-models.outputs.models) }} + # fail-fast: false + # with: + # runner: ${{ matrix.runner }} + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: ${{ matrix.timeout }} + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # MODEL_NAME=${{ matrix.model }} + # BUILD_TOOL=${{ matrix.build-tool }} + # BACKEND=${{ matrix.backend }} + # DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }} + + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + # # Build and test ExecuTorch + # PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}" + + # test-llama-runner-linux: + # name: test-llama-runner-linux + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # strategy: + # matrix: + # dtype: [fp32] + # mode: [portable, xnnpack+custom, xnnpack+custom+qe] + # include: + # - dtype: bf16 + # mode: portable + # - dtype: bf16 + # mode: custom + # fail-fast: false + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 900 + # upload-artifact: android-models + # upload-artifact-to-s3: true + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # DTYPE=${{ matrix.dtype }} + # BUILD_TOOL="cmake" + # MODE=${{ matrix.mode }} + # ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}" + # ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}" + + # # Setup executorch + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + # # Install requirements for export_llama + # PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh + # # Test llama2 + # PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}" + + # test-llama-runner-linux-android: + # name: test-llama-runner-linux-android + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # strategy: + # fail-fast: false + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-clang12-android + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # BUILD_TOOL="cmake" + # PYTHON_EXECUTABLE=python \ + # bash .ci/scripts/build_llama_android.sh "${BUILD_TOOL}" + + # test-custom-ops-linux: + # name: test-custom-ops-linux + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # strategy: + # fail-fast: false + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # BUILD_TOOL="cmake" + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + # # Test custom ops + # PYTHON_EXECUTABLE=python bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}" + + # test-selective-build-linux: + # name: test-selective-build-linux + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # strategy: + # fail-fast: false + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # BUILD_TOOL="cmake" + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + # # Test selective build + # PYTHON_EXECUTABLE=python bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}" + + # test-llava-runner-linux: + # name: test-llava-runner-linux + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # strategy: + # fail-fast: false + # with: + # runner: linux.24xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" + + # # install pybind + # bash install_requirements.sh --pybind xnnpack + + # # install Llava requirements + # bash examples/models/llama/install_requirements.sh + # bash examples/models/llava/install_requirements.sh + + # # run python unittest + # python -m unittest examples.models.llava.test.test_llava + + # # run e2e (export, tokenizer and runner) + # PYTHON_EXECUTABLE=python bash .ci/scripts/test_llava.sh + + # test-quantized-aot-lib-linux: + # name: test-quantized-aot-lib-linux + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # strategy: + # fail-fast: false + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # BUILD_TOOL="cmake" + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + # PYTHON_EXECUTABLE=python bash examples/xnnpack/quantization/test_quantize.sh "${BUILD_TOOL}" mv2 + + # test-pybind-build-linux: + # name: test-pybind-build-linux + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # strategy: + # fail-fast: false + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # # build module for executorch.extension.pybindings.portable_lib + # BUILD_TOOL="cmake" + # PYTHON_EXECUTABLE=python \ + # EXECUTORCH_BUILD_XNNPACK=ON \ + # EXECUTORCH_BUILD_PYBIND=ON \ + # bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + + # # see if we can import the module successfully + # python -c "from executorch.extension.pybindings import portable_lib; print('success!')" + + # test-binary-size-linux-gcc: + # name: test-binary-size-linux-gcc + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # strategy: + # fail-fast: false + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-gcc9 + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # # build module for executorch.extension.pybindings.portable_lib + # bash test/build_size_test.sh + # strip cmake-out/test/size_test + # output=$(ls -la cmake-out/test/size_test) + # arr=($output) + # size=${arr[4]} + # # threshold=48120 on devserver with gcc11.4 + # # todo(lfq): update once binary size is below 50kb. + # threshold="51504" + # if [[ "$size" -le "$threshold" ]]; then + # echo "Success $size <= $threshold" + # else + # echo "Fail $size > $threshold" + # exit 1 + # fi + + # test-binary-size-linux: + # name: test-binary-size-linux + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # strategy: + # fail-fast: false + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # # build module for executorch.extension.pybindings.portable_lib + # bash test/build_size_test.sh + # strip cmake-out/test/size_test + # output=$(ls -la cmake-out/test/size_test) + # arr=($output) + # size=${arr[4]} + # # threshold=48120 on devserver with gcc11.4 + # # todo(lfq): update once binary size is below 50kb. + # threshold="51784" + # if [[ "$size" -le "$threshold" ]]; then + # echo "Success $size <= $threshold" + # else + # echo "Fail $size > $threshold" + # exit 1 + # fi + + # android: + # uses: ./.github/workflows/_android.yml + # needs: test-llama-runner-linux + + # unittest: + # uses: ./.github/workflows/_unittest.yml + # with: + # docker-image: executorch-ubuntu-22.04-clang12 + + # unittest-arm: + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-arm-sdk + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # set -eux + + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # BUILD_TOOL="cmake" + + # # Setup MacOS dependencies as there is no Docker support on MacOS atm + # PYTHON_EXECUTABLE=python \ + # EXECUTORCH_BUILD_PYBIND=ON \ + # EXECUTORCH_BUILD_ARM_BAREMETAL=ON \ + # .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + + # source .ci/scripts/utils.sh + # # Install Arm dependencies + # install_arm + + # # Run pytest with coverage + # pytest -c /dev/null -v -n auto --cov=./ --cov-report=xml backends/arm/test test-llama-runner-qnn-linux: @@ -368,6 +368,7 @@ jobs: strategy: matrix: dtype: [fp32] + pt2e_quantize: [qnn_16a16w] mode: [qnn] fail-fast: false with: @@ -384,6 +385,7 @@ jobs: DTYPE=${{ matrix.dtype }} BUILD_TOOL="cmake" MODE=${{ matrix.mode }} + PT2E_QUANTIZE=${{ matrix.pt2e_quantize }} PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh @@ -393,112 +395,112 @@ jobs: # Install requirements for export_llama PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh # Test llama2 - PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" - - test-phi-3-mini-runner-linux: - name: test-phi-3-mini-runner-linux - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - fail-fast: false - with: - runner: linux.24xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" - - # install pybind - bash install_requirements.sh --pybind xnnpack - - # install phi-3-mini requirements - bash examples/models/phi-3-mini/install_requirements.sh - - # run e2e (export, tokenizer and runner) - PYTHON_EXECUTABLE=python bash .ci/scripts/test_phi_3_mini.sh - - test-eval_llama-wikitext-linux: - name: test-eval_llama-wikitext-linux - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - fail-fast: false - with: - runner: linux.24xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" - - # install pybind - bash install_requirements.sh --pybind xnnpack - - # install llama requirements - bash examples/models/llama/install_requirements.sh - - # run eval_llama wikitext task - PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_wikitext.sh - - test-eval_llama-mmlu-linux: - name: test-eval_llama-mmlu-linux - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - fail-fast: false - with: - runner: linux.24xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" - - # install pybind - bash install_requirements.sh --pybind xnnpack - - # install llama requirements - bash examples/models/llama/install_requirements.sh - - # run eval_llama mmlu task - PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_mmlu.sh - - test-llama_runner_eager-linux: - name: test-llama_runner_eager-linux - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - fail-fast: false - with: - runner: linux.24xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" - - # install pybind - bash install_requirements.sh --pybind xnnpack - - # install llama requirements - bash examples/models/llama/install_requirements.sh - - # run llama runner in eager mode - PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh + PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -pt2e_quantize "${PT2E_QUANTIZE}" + + # test-phi-3-mini-runner-linux: + # name: test-phi-3-mini-runner-linux + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # strategy: + # fail-fast: false + # with: + # runner: linux.24xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" + + # # install pybind + # bash install_requirements.sh --pybind xnnpack + + # # install phi-3-mini requirements + # bash examples/models/phi-3-mini/install_requirements.sh + + # # run e2e (export, tokenizer and runner) + # PYTHON_EXECUTABLE=python bash .ci/scripts/test_phi_3_mini.sh + + # test-eval_llama-wikitext-linux: + # name: test-eval_llama-wikitext-linux + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # strategy: + # fail-fast: false + # with: + # runner: linux.24xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" + + # # install pybind + # bash install_requirements.sh --pybind xnnpack + + # # install llama requirements + # bash examples/models/llama/install_requirements.sh + + # # run eval_llama wikitext task + # PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_wikitext.sh + + # test-eval_llama-mmlu-linux: + # name: test-eval_llama-mmlu-linux + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # strategy: + # fail-fast: false + # with: + # runner: linux.24xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" + + # # install pybind + # bash install_requirements.sh --pybind xnnpack + + # # install llama requirements + # bash examples/models/llama/install_requirements.sh + + # # run eval_llama mmlu task + # PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_mmlu.sh + + # test-llama_runner_eager-linux: + # name: test-llama_runner_eager-linux + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # strategy: + # fail-fast: false + # with: + # runner: linux.24xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" + + # # install pybind + # bash install_requirements.sh --pybind xnnpack + + # # install llama requirements + # bash examples/models/llama/install_requirements.sh + + # # run llama runner in eager mode + # PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 7afc385a19..5f5d638ba4 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -36,408 +36,408 @@ jobs: PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --target-os macos --event "${GITHUB_EVENT_NAME}" - test-models-macos: - name: test-models-macos - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - needs: gather-models - strategy: - matrix: ${{ fromJSON(needs.gather-models.outputs.models) }} - fail-fast: false - with: - runner: ${{ matrix.runner }} - python-version: '3.11' - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: ${{ matrix.timeout }} - script: | - MODEL_NAME=${{ matrix.model }} - BUILD_TOOL=${{ matrix.build-tool }} - BACKEND=${{ matrix.backend }} - DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }} - - bash .ci/scripts/setup-conda.sh - # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" - # Build and test executorch - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}" - - test-custom-ops-macos: - name: test-custom-ops-macos - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - strategy: - matrix: - include: - - build-tool: cmake - fail-fast: false - with: - runner: macos-m1-stable - python-version: '3.11' - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - script: | - BUILD_TOOL=${{ matrix.build-tool }} - - bash .ci/scripts/setup-conda.sh - # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" - # Build and test custom ops - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}" - - test-selective-build-macos: - name: test-selective-build-macos - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - strategy: - matrix: - include: - - build-tool: cmake - fail-fast: false - with: - runner: macos-m1-stable - python-version: '3.11' - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - script: | - BUILD_TOOL=${{ matrix.build-tool }} - - bash .ci/scripts/setup-conda.sh - # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" - # Build and test selective build - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}" - - test-demo-backend-delegation: - name: test-demo-backend-delegation - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - matrix: - include: - - build-tool: buck2 - - build-tool: cmake - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - BUILD_TOOL=${{ matrix.build-tool }} - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" - # Test selective build - PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}" - - test-arm-backend-delegation: - name: test-arm-backend-delegation - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-arm-sdk - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - source .ci/scripts/utils.sh - install_executorch - - install_arm - - # Increase number of files user can monitor to bypass buck failures. - # Hopefully this is high enough for this setup. - sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024 - - # Test ethos-u delegate examples with run.sh - PYTHON_EXECUTABLE=python bash examples/arm/run.sh examples/arm/ethos-u-scratch/ - - test-arm-reference-delegation: - name: test-arm-reference-delegation - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-arm-sdk - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - source .ci/scripts/utils.sh - install_executorch - - install_arm - - # Run arm unit tests - pytest -c /dev/null -v -n auto --cov=./ --cov-report=xml backends/arm/test - - test-coreml-delegate: - name: test-coreml-delegate - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - with: - runner: macos-13-xlarge - python-version: '3.11' - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - BUILD_TOOL=cmake - - bash .ci/scripts/setup-conda.sh - # Setup MacOS dependencies as there is no Docker support on MacOS atm - GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" - # Build and test coreml delegate - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh - - test-pybind-build-macos: - name: test-pybind-build-macos - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - strategy: - matrix: - include: - - build-tool: cmake - fail-fast: false - with: - runner: macos-m1-stable - python-version: '3.11' - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 180 - script: | - bash .ci/scripts/setup-conda.sh - - # build module for executorch.extension.pybindings.portable_lib - BUILD_TOOL=${{ matrix.build-tool }} - EXECUTORCH_BUILD_PYBIND=ON PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" - - # see if we can import the module successfully - ${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')" - - test-llama-runner-macos: - name: test-llama-runner-mac - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - strategy: - matrix: - dtype: [fp32] - mode: [portable, xnnpack+kv+custom, mps, coreml] - include: - - dtype: bf16 - mode: portable - - dtype: bf16 - mode: custom - fail-fast: false - with: - runner: macos-m1-stable - python-version: '3.11' - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 900 - script: | - - DTYPE=${{ matrix.dtype }} - MODE=${{ matrix.mode }} - - bash .ci/scripts/setup-conda.sh - - # Setup executorch - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh cmake - - if [[ "${MODE}" == "mps" ]]; then - # Install mps delegate - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh - echo "Finishing installing mps." - elif [[ "${MODE}" == "coreml" ]]; then - # Install coreml delegate - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh - echo "Finishing installing coreml." - fi - - # Install requirements for export_llama - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh - # Test llama2 - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}" - - # # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner. - # test-llava-runner-macos: - # name: test-llava-runner-macos + # test-models-macos: + # name: test-models-macos # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # needs: gather-models # strategy: + # matrix: ${{ fromJSON(needs.gather-models.outputs.models) }} # fail-fast: false # with: - # runner: macos-14-xlarge + # runner: ${{ matrix.runner }} + # python-version: '3.11' + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: ${{ matrix.timeout }} + # script: | + # MODEL_NAME=${{ matrix.model }} + # BUILD_TOOL=${{ matrix.build-tool }} + # BACKEND=${{ matrix.backend }} + # DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }} + + # bash .ci/scripts/setup-conda.sh + # # Setup MacOS dependencies as there is no Docker support on MacOS atm + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + # # Build and test executorch + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}" + + # test-custom-ops-macos: + # name: test-custom-ops-macos + # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # strategy: + # matrix: + # include: + # - build-tool: cmake + # fail-fast: false + # with: + # runner: macos-m1-stable + # python-version: '3.11' + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # script: | + # BUILD_TOOL=${{ matrix.build-tool }} + + # bash .ci/scripts/setup-conda.sh + # # Setup MacOS dependencies as there is no Docker support on MacOS atm + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + # # Build and test custom ops + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}" + + # test-selective-build-macos: + # name: test-selective-build-macos + # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # strategy: + # matrix: + # include: + # - build-tool: cmake + # fail-fast: false + # with: + # runner: macos-m1-stable # python-version: '3.11' # submodules: 'true' # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - # timeout: 900 + # script: | + # BUILD_TOOL=${{ matrix.build-tool }} + + # bash .ci/scripts/setup-conda.sh + # # Setup MacOS dependencies as there is no Docker support on MacOS atm + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + # # Build and test selective build + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}" + + # test-demo-backend-delegation: + # name: test-demo-backend-delegation + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # strategy: + # matrix: + # include: + # - build-tool: buck2 + # - build-tool: cmake + # fail-fast: false + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # BUILD_TOOL=${{ matrix.build-tool }} + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" + # # Test selective build + # PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}" + + # test-arm-backend-delegation: + # name: test-arm-backend-delegation + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-arm-sdk + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # source .ci/scripts/utils.sh + # install_executorch + + # install_arm + + # # Increase number of files user can monitor to bypass buck failures. + # # Hopefully this is high enough for this setup. + # sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024 + + # # Test ethos-u delegate examples with run.sh + # PYTHON_EXECUTABLE=python bash examples/arm/run.sh examples/arm/ethos-u-scratch/ + + # test-arm-reference-delegation: + # name: test-arm-reference-delegation + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-arm-sdk + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # source .ci/scripts/utils.sh + # install_executorch + + # install_arm + + # # Run arm unit tests + # pytest -c /dev/null -v -n auto --cov=./ --cov-report=xml backends/arm/test + + # test-coreml-delegate: + # name: test-coreml-delegate + # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # with: + # runner: macos-13-xlarge + # python-version: '3.11' + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 # script: | # BUILD_TOOL=cmake # bash .ci/scripts/setup-conda.sh # # Setup MacOS dependencies as there is no Docker support on MacOS atm # GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + # # Build and test coreml delegate + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh + + # test-pybind-build-macos: + # name: test-pybind-build-macos + # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # strategy: + # matrix: + # include: + # - build-tool: cmake + # fail-fast: false + # with: + # runner: macos-m1-stable + # python-version: '3.11' + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 180 + # script: | + # bash .ci/scripts/setup-conda.sh + + # # build module for executorch.extension.pybindings.portable_lib + # BUILD_TOOL=${{ matrix.build-tool }} + # EXECUTORCH_BUILD_PYBIND=ON PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + + # # see if we can import the module successfully + # ${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')" + + # test-llama-runner-macos: + # name: test-llama-runner-mac + # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # strategy: + # matrix: + # dtype: [fp32] + # mode: [portable, xnnpack+kv+custom, mps, coreml] + # include: + # - dtype: bf16 + # mode: portable + # - dtype: bf16 + # mode: custom + # fail-fast: false + # with: + # runner: macos-m1-stable + # python-version: '3.11' + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 900 + # script: | + + # DTYPE=${{ matrix.dtype }} + # MODE=${{ matrix.mode }} + + # bash .ci/scripts/setup-conda.sh - # # install Llava requirements - # ${CONDA_RUN} bash examples/models/llama/install_requirements.sh - # ${CONDA_RUN} bash examples/models/llava/install_requirements.sh - - # # run python unittest - # ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava - - # # run e2e (export, tokenizer and runner) - # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh Release - - test-qnn-model: - name: test-qnn-model - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - matrix: - dtype: [fp32] - model: [dl3, mv3, mv2, ic4, ic3, vit] - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-qnn-sdk - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 900 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh - PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh - PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" - - test-apple-model: - name: test-apple-model - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - strategy: - fail-fast: false - with: - runner: macos-m1-stable - python-version: '3.11' - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - BUILD_TOOL=cmake - - bash .ci/scripts/setup-conda.sh - - # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh - echo "Finishing installing coreml." - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh - echo "Finishing installing mps." - - # Build and test coreml model - MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l) - for MODEL_NAME in "${MODELS[@]}"; do - echo "::group::Exporting coreml model: $MODEL_NAME" - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "coreml" - echo "::endgroup::" - - echo "::group::Exporting mps model: $MODEL_NAME" - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps" - echo "::endgroup::" - done - - test-huggingface-transformers: - name: test-huggingface-transformers - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - secrets: inherit - strategy: - matrix: - hf_model_repo: [google/gemma-2b] - fail-fast: false - with: - secrets-env: EXECUTORCH_HF_TOKEN - runner: linux.12xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - echo "::group::Set up ExecuTorch" - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake - - echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a" - rm -rf cmake-out - cmake \ - -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ - -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ - -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DPYTHON_EXECUTABLE=python \ - -Bcmake-out . - cmake --build cmake-out -j9 --target install --config Release - - echo "Build llama runner" - dir="examples/models/llama" - cmake \ - -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ - -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DPYTHON_EXECUTABLE=python \ - -Bcmake-out/${dir} \ - ${dir} - cmake --build cmake-out/${dir} -j9 --config Release - echo "::endgroup::" - - echo "::group::Set up HuggingFace Dependencies" - if [ -z "$SECRET_EXECUTORCH_HF_TOKEN" ]; then - echo "::error::SECRET_EXECUTORCH_HF_TOKEN is empty. For security reason secrets won't be accessible on forked PRs. Please make sure you submit a non-forked PR." - exit 1 - fi - pip install -U "huggingface_hub[cli]" - huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN - pip install accelerate sentencepiece - pip list - echo "::endgroup::" - - echo "::group::Export to ExecuTorch" - TOKENIZER_FILE=tokenizer.model - TOKENIZER_BIN_FILE=tokenizer.bin - ET_MODEL_NAME=et_model - # Fetch the file using a Python one-liner - DOWNLOADED_TOKENIZER_FILE_PATH=$(python -c " - from huggingface_hub import hf_hub_download - # Download the file from the Hugging Face Hub - downloaded_path = hf_hub_download( - repo_id='${{ matrix.hf_model_repo }}', - filename='${TOKENIZER_FILE}' - ) - print(downloaded_path) - ") - if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then - echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH" - python -m extension.llm.tokenizer.tokenizer -t $DOWNLOADED_TOKENIZER_FILE_PATH -o ./${TOKENIZER_BIN_FILE} - ls ./tokenizer.bin - else - echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}." - exit 1 - fi - - python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME} - - cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is" - echo "::endgroup::" + # # Setup executorch + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh cmake + + # if [[ "${MODE}" == "mps" ]]; then + # # Install mps delegate + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh + # echo "Finishing installing mps." + # elif [[ "${MODE}" == "coreml" ]]; then + # # Install coreml delegate + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh + # echo "Finishing installing coreml." + # fi + + # # Install requirements for export_llama + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh + # # Test llama2 + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}" + + # # # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner. + # # test-llava-runner-macos: + # # name: test-llava-runner-macos + # # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # # strategy: + # # fail-fast: false + # # with: + # # runner: macos-14-xlarge + # # python-version: '3.11' + # # submodules: 'true' + # # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # # timeout: 900 + # # script: | + # # BUILD_TOOL=cmake + + # # bash .ci/scripts/setup-conda.sh + # # # Setup MacOS dependencies as there is no Docker support on MacOS atm + # # GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + + # # # install Llava requirements + # # ${CONDA_RUN} bash examples/models/llama/install_requirements.sh + # # ${CONDA_RUN} bash examples/models/llava/install_requirements.sh + + # # # run python unittest + # # ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava + + # # # run e2e (export, tokenizer and runner) + # # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh Release + + # test-qnn-model: + # name: test-qnn-model + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # strategy: + # matrix: + # dtype: [fp32] + # model: [dl3, mv3, mv2, ic4, ic3, vit] + # fail-fast: false + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-qnn-sdk + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 900 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh + # PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh + # PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" + + # test-apple-model: + # name: test-apple-model + # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # strategy: + # fail-fast: false + # with: + # runner: macos-m1-stable + # python-version: '3.11' + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # BUILD_TOOL=cmake + + # bash .ci/scripts/setup-conda.sh + + # # Setup MacOS dependencies as there is no Docker support on MacOS atm + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh + # echo "Finishing installing coreml." + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh + # echo "Finishing installing mps." + + # # Build and test coreml model + # MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l) + # for MODEL_NAME in "${MODELS[@]}"; do + # echo "::group::Exporting coreml model: $MODEL_NAME" + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "coreml" + # echo "::endgroup::" + + # echo "::group::Exporting mps model: $MODEL_NAME" + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps" + # echo "::endgroup::" + # done + + # test-huggingface-transformers: + # name: test-huggingface-transformers + # uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + # secrets: inherit + # strategy: + # matrix: + # hf_model_repo: [google/gemma-2b] + # fail-fast: false + # with: + # secrets-env: EXECUTORCH_HF_TOKEN + # runner: linux.12xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'true' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # echo "::group::Set up ExecuTorch" + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake + + # echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a" + # rm -rf cmake-out + # cmake \ + # -DCMAKE_INSTALL_PREFIX=cmake-out \ + # -DCMAKE_BUILD_TYPE=Release \ + # -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + # -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + # -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + # -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + # -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ + # -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ + # -DEXECUTORCH_BUILD_XNNPACK=ON \ + # -DPYTHON_EXECUTABLE=python \ + # -Bcmake-out . + # cmake --build cmake-out -j9 --target install --config Release + + # echo "Build llama runner" + # dir="examples/models/llama" + # cmake \ + # -DCMAKE_INSTALL_PREFIX=cmake-out \ + # -DCMAKE_BUILD_TYPE=Release \ + # -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + # -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ + # -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ + # -DEXECUTORCH_BUILD_XNNPACK=ON \ + # -DPYTHON_EXECUTABLE=python \ + # -Bcmake-out/${dir} \ + # ${dir} + # cmake --build cmake-out/${dir} -j9 --config Release + # echo "::endgroup::" + + # echo "::group::Set up HuggingFace Dependencies" + # if [ -z "$SECRET_EXECUTORCH_HF_TOKEN" ]; then + # echo "::error::SECRET_EXECUTORCH_HF_TOKEN is empty. For security reason secrets won't be accessible on forked PRs. Please make sure you submit a non-forked PR." + # exit 1 + # fi + # pip install -U "huggingface_hub[cli]" + # huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN + # pip install accelerate sentencepiece + # pip list + # echo "::endgroup::" + + # echo "::group::Export to ExecuTorch" + # TOKENIZER_FILE=tokenizer.model + # TOKENIZER_BIN_FILE=tokenizer.bin + # ET_MODEL_NAME=et_model + # # Fetch the file using a Python one-liner + # DOWNLOADED_TOKENIZER_FILE_PATH=$(python -c " + # from huggingface_hub import hf_hub_download + # # Download the file from the Hugging Face Hub + # downloaded_path = hf_hub_download( + # repo_id='${{ matrix.hf_model_repo }}', + # filename='${TOKENIZER_FILE}' + # ) + # print(downloaded_path) + # ") + # if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then + # echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH" + # python -m extension.llm.tokenizer.tokenizer -t $DOWNLOADED_TOKENIZER_FILE_PATH -o ./${TOKENIZER_BIN_FILE} + # ls ./tokenizer.bin + # else + # echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}." + # exit 1 + # fi + + # python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME} + + # cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is" + # echo "::endgroup::"