Skip to content

Commit

Permalink
Merge branch 'eval-ci-timeout' of https://github.com/nod-ai/SHARK-Pla…
Browse files Browse the repository at this point in the history
…tform into eval-ci-timeout
  • Loading branch information
archana-ramalingam committed Oct 23, 2024
2 parents 6b5274a + f92260b commit 41afc31
Show file tree
Hide file tree
Showing 7 changed files with 1,161 additions and 0 deletions.
78 changes: 78 additions & 0 deletions .github/workflows/ci-llama.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
name: Llama Benchmarking Tests

on:
workflow_dispatch:
schedule:
# Weekdays at 9:00 AM UTC = 2:00 AM PST.
- cron: "0 9 * * 1-5"

concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit). The workflow name is prepended to avoid conflicts between
# different workflows.
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true

jobs:
test_llama:
name: "Llama Benchmarking Tests"
strategy:
matrix:
version: [3.11]
fail-fast: false
runs-on: llama-mi300
defaults:
run:
shell: bash
env:
PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache"
VENV_DIR: ${{ github.workspace }}/.venv
steps:
- name: Get Current Date
id: date
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"

- name: "Setting up Python"
id: setup_python
uses: actions/setup-python@v3
with:
python-version: ${{matrix.version}}

- name: "Checkout Code"
uses: actions/checkout@v3

- name: Cache Pip Packages
uses: actions/cache@v4
id: cache-pip
with:
path: ${{ env.PIP_CACHE_DIR }}
key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }}

- name: Install pip deps
run: |
python -m pip install --no-compile --upgrade pip
# Note: We install in three steps in order to satisfy requirements
# from non default locations first. Installing the PyTorch CPU
# wheels saves multiple minutes and a lot of bandwidth on runner setup.
pip install --no-compile -r pytorch-cpu-requirements.txt
pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \
-e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"
pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/
# Try with the latest nightly releases, not what iree-turbine pins.
# We could also pin to a known working or stable version.
# This should eventually stabilize. Do the best we can for now.
pip install -f https://iree.dev/pip-release-links.html --upgrade \
iree-compiler \
iree-runtime \
"numpy<2.0"
- name: Run llama test
run: pytest sharktank/tests/models/llama/benchmark_amdgpu_tests.py -v -s --longrun

- name: Upload llama executable files
uses: actions/upload-artifact@v4
with:
name: llama-files
path: ${{ github.workspace }}/${{ steps.date.outputs.date }}
14 changes: 14 additions & 0 deletions sharktank/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,13 @@ def pytest_addoption(parser):
help="Llama3.1 8B & 405B model baseline perplexity scores json",
)

parser.addoption(
"--iree-hip-target",
action="store",
default="gfx942",
help="Specify the iree-hip target version (e.g., gfx942)",
)


def set_fixture_from_cli_option(
request: FixtureRequest,
Expand Down Expand Up @@ -168,6 +175,13 @@ def caching(request: FixtureRequest) -> Optional[bool]:
return set_fixture_from_cli_option(request, "caching")


@pytest.fixture(scope="class")
def iree_hip_target_type(request: FixtureRequest) -> Optional[str]:
return set_fixture_from_cli_option(
request, "iree_hip_target", "iree_hip_target_type"
)


@pytest.fixture(scope="class")
def get_model_path(request: FixtureRequest):
model_path = {}
Expand Down
15 changes: 15 additions & 0 deletions sharktank/sharktank/examples/export_paged_llm_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

# TODO: Should be using a base class with the protocol supported.
from ..models.llama.llama import LlamaModelConfig, PagedLlamaModelV1
from ..models.llama.sharding import shard_theta
from ..models.mixtral.mixtral import *
from ..models.grok.grok import *

Expand Down Expand Up @@ -51,6 +52,18 @@ def main():
help="Enables strictness during export",
action="store_true",
)
parser.add_argument(
"--attention-kernel",
type=str,
default="decomposed",
choices=["decomposed", "torch_sdpa"],
)
parser.add_argument(
"--tensor-parallelism-size",
type=int,
default=1,
help="How many devices are involved for tensor parallel sharding.",
)

args = cli.parse(parser)
dataset_type = cli.get_input_data_files(args)
Expand All @@ -59,6 +72,8 @@ def main():

hp = configs.LlamaHParams.from_gguf_props(dataset.properties)
llama_config = LlamaModelConfig(hp)
if args.tensor_parallelism_size > 1:
dataset.root_theta = shard_theta(dataset.root_theta, llama_config)
llama_config.use_hf = False
llama_config.static_tables = False # Rely on the compiler for hoisting tables.
llama_config.kv_cache_type = "direct" if args.bs == [1] else "paged"
Expand Down
Loading

0 comments on commit 41afc31

Please sign in to comment.