Skip to content

[tuner] Filter out non finite benchmark times #1988

[tuner] Filter out non finite benchmark times

[tuner] Filter out non finite benchmark times #1988

Workflow file for this run

# Copyright 2024 Advanced Micro Devices, Inc.
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
name: CI - sharktank
on:
workflow_dispatch:
pull_request:
push:
branches:
- main
concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit). The workflow name is prepended to avoid conflicts between
# different workflows.
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true
jobs:
test:
name: "Unit Tests (${{ matrix.os }}, ${{ matrix.python-version }}, ${{ matrix.torch-version }})"
strategy:
matrix:
python-version: ["3.11", "3.12"]
torch-version: ["2.3.0", "2.4.1", "2.5.1"]
os: [ubuntu-24.04]
include:
- os: windows-2022
python-version: "3.11"
torch-version: "2.3.0"
- os: windows-2022
python-version: "3.12"
torch-version: "2.4.1"
exclude:
- python-version: "3.12"
# `torch.compile` requires torch>=2.4.0 for Python 3.12+
torch-version: "2.3.0"
fail-fast: false
runs-on: ${{matrix.os}}
defaults:
run:
shell: bash
env:
PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: "Setting up Python"
id: setup_python
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{matrix.python-version}}
- name: Cache Pip Packages
uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
id: cache-pip
with:
path: ${{ env.PIP_CACHE_DIR }}
key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','sharktank/requirements*.txt') }}
- name: Install numpy
if: ${{ matrix.os == 'windows-2022' && matrix.torch-version == '2.3.0' }}
run: pip install "numpy<2.0"
- name: Install pip deps
run: |
python -m pip install --no-compile --upgrade pip
# Note: We install in three steps in order to satisfy requirements
# from non default locations first.
pip install --no-compile \
--index-url https://download.pytorch.org/whl/cpu torch==${{matrix.torch-version}}+cpu
pip install -r requirements-iree-pinned.txt
pip install --no-compile \
-r sharktank/requirements-tests.txt \
-e sharktank/
pip freeze
- name: Run sharktank tests
if: ${{ !cancelled() }}
run: |
pytest -n 4 sharktank/ --durations=10
test_with_data:
name: "Data-dependent Tests"
strategy:
matrix:
python-version: [3.11]
runs-on: [llama-mi300x-3]
fail-fast: false
runs-on: ${{matrix.runs-on}}
defaults:
run:
shell: bash
env:
VENV_DIR: ${{ github.workspace }}/.venv
HF_HOME: "/data/huggingface"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: "Setting up Python"
id: setup_python
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{matrix.python-version}}
- name: Create Python venv
run: python -m venv ${VENV_DIR}
- name: Install sharktank deps
run: |
source ${VENV_DIR}/bin/activate
python -m pip install --no-compile --upgrade pip
# Note: We install in three steps in order to satisfy requirements
# from non default locations first.
pip install --no-compile -r pytorch-cpu-requirements.txt
pip install -r requirements-iree-pinned.txt
pip install --no-compile \
-r sharktank/requirements-tests.txt \
-e sharktank/
pip freeze
- name: Run tests
# TODO: unify with-*-data flags into a single flag and make it possible to run
# only tests that require data.
# We would still want the separate flags as we may endup with data being
# scattered on different CI machines.
run: |
source ${VENV_DIR}/bin/activate
pytest \
--with-clip-data \
--with-flux-data \
--with-t5-data \
--with-vae-data \
sharktank/tests/models/clip/clip_test.py \
sharktank/tests/models/t5/t5_test.py \
sharktank/tests/models/flux/flux_test.py \
sharktank/tests/models/vae/vae_test.py \
--durations=0
test_integration:
name: "Model Integration Tests"
runs-on: ubuntu-24.04
env:
PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: "Setting up Python"
id: setup_python
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: 3.11
- name: Cache Pip Packages
uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
id: cache-pip
with:
path: ${{ env.PIP_CACHE_DIR }}
key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','sharktank/requirements*.txt') }}
- name: Install pip deps
run: |
python -m pip install --no-compile --upgrade pip
# Note: We install in three steps in order to satisfy requirements
# from non default locations first.
pip install --no-compile -r pytorch-cpu-requirements.txt
pip install -r requirements-iree-pinned.txt
pip install --no-compile \
-r sharktank/requirements-tests.txt \
-e sharktank/
pip freeze
- name: Run punet tests
run: |
pytest -v sharktank/ -m punet_quick \
--durations=0
# Depends on other jobs to provide an aggregate job status.
# TODO(#584): move test_with_data and test_integration to a pkgci integration test workflow?
ci_sharktank_summary:
if: always()
runs-on: ubuntu-24.04
needs:
- test
steps:
- name: Getting failed jobs
run: |
echo '${{ toJson(needs) }}'
FAILED_JOBS="$(echo '${{ toJson(needs) }}' \
| jq --raw-output \
'map_values(select(.result!="success" and .result!="skipped")) | keys | join(",")' \
)"
if [[ "${FAILED_JOBS}" != "" ]]; then
echo "The following jobs failed: ${FAILED_JOBS}"
exit 1
fi