Skip to content

Commit

Permalink
Enable cuda tests (#183)
Browse files Browse the repository at this point in the history
  • Loading branch information
baijumeswani authored and jchen351 committed Mar 23, 2024
1 parent f44525a commit 2923d33
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 27 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/linux-cpu-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ jobs:
echo "::add-mask::$HF_TOKEN"
echo "HF_TOKEN=$HF_TOKEN" >> $GITHUB_ENV
# This will also download all the test models to the test/test_models directory
# These models are used by the python tests as well as C#, C++ and others.
- name: Run the python tests
run: |
python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/win-gpu-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ jobs:
cmake --preset windows_x64_cuda_release -T cuda=${{ env.cuda_dir }}\\v${{ env.cuda_version }} -DTEST_PHI2=False
cmake --build --preset windows_x64_cuda_release --parallel
- name: Add CUDA to PATH
run: |
echo "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install the Python Wheel and Test Dependencies
run: |
python -m pip install (Get-ChildItem ("$env:cmake_build_dir\wheel\*.whl"))
Expand All @@ -68,10 +72,6 @@ jobs:
run: |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"
- name: Add CUDA to PATH
run: |
echo "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Build the C# API and Run the C# Tests
run: |
cd test\csharp
Expand Down
6 changes: 3 additions & 3 deletions test/csharp/TestOnnxRuntimeGenAIAPI.cs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public void TestTopKSearch()
int topK = 100;
float temp = 0.6f;
ulong maxLength = 20;

string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "cpu", "phi-2");
using (var model = new Model(modelPath))
{
Expand Down Expand Up @@ -135,7 +135,7 @@ public void TestTopPSearch()
float topP = 0.6f;
float temp = 0.6f;
ulong maxLength = 20;

string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "cpu", "phi-2");
using (var model = new Model(modelPath))
{
Expand Down Expand Up @@ -178,7 +178,7 @@ public void TestTopKTopPSearch()
float topP = 0.6f;
float temp = 0.6f;
ulong maxLength = 20;

string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "cpu", "phi-2");
using (var model = new Model(modelPath))
{
Expand Down
31 changes: 31 additions & 0 deletions test/python/_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,34 @@ def run_subprocess(
"Subprocess completed. Return code=" + str(completed_process.returncode)
)
return completed_process


def download_models(download_path, device):
# python -m onnxruntime_genai.models.builder -m <model_name> -p int4 -e cpu -o <download_path> --extra_options num_hidden_layers=1
model_names = {
"cpu": {
"phi-2": "microsoft/phi-2",
},
"cuda": {
"phi-2": "microsoft/phi-2",
},
}
for model_name, model_identifier in model_names[device].items():
model_path = os.path.join(download_path, device, model_name)
if not os.path.exists(model_path):
command = [
sys.executable,
"-m",
"onnxruntime_genai.models.builder",
"-m",
model_identifier,
"-p",
"int4",
"-e",
device,
"-o",
model_path,
"--extra_options",
"num_hidden_layers=1",
]
run_subprocess(command).check_returncode()
28 changes: 20 additions & 8 deletions test/python/test_onnxruntime_genai.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
import os
import pathlib
import sys
import sysconfig
from typing import Union

from _test_utils import run_subprocess
import onnxruntime_genai as og
from _test_utils import download_models, run_subprocess

logging.basicConfig(
format="%(asctime)s %(name)s [%(levelname)s] - %(message)s", level=logging.DEBUG
Expand Down Expand Up @@ -42,8 +44,7 @@ def run_onnxruntime_genai_e2e_tests(
):
log.debug("Running: ONNX Runtime GenAI E2E Tests")

log.debug("Running: Phi-2")
command = [sys.executable, "test_onnxruntime_genai_phi2.py"]
command = [sys.executable, "test_onnxruntime_genai_e2e.py"]
run_subprocess(command, cwd=cwd, log=log).check_returncode()


Expand Down Expand Up @@ -73,11 +74,22 @@ def main():

log.info("Running onnxruntime-genai tests pipeline")

run_onnxruntime_genai_api_tests(
os.path.abspath(args.cwd), log, os.path.abspath(args.test_models)
)

if args.e2e:
if not args.e2e:
if not (
sysconfig.get_platform().endswith("arm64") or sys.version_info.minor < 8
):
download_models(os.path.abspath(args.test_models), "cpu")
if og.is_cuda_available():
download_models(
os.path.abspath(args.test_models),
"cuda",
)

run_onnxruntime_genai_api_tests(
os.path.abspath(args.cwd), log, os.path.abspath(args.test_models)
)

else:
run_onnxruntime_genai_e2e_tests(os.path.abspath(args.cwd), log)

return 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,19 @@
import tempfile

import onnxruntime_genai as og

from _test_utils import run_subprocess


def download_model(download_path: str | bytes | os.PathLike, device: str):
def download_model(
download_path: str | bytes | os.PathLike, device: str, model_identifier: str
):
# python -m onnxruntime_genai.models.builder -m microsoft/phi-2 -p int4 -e cpu -o download_path
command = [
sys.executable,
"-m",
"onnxruntime_genai.models.builder",
"-m",
"microsoft/phi-2",
model_identifier,
"-p",
"int4",
"-e",
Expand All @@ -28,8 +29,8 @@ def download_model(download_path: str | bytes | os.PathLike, device: str):
run_subprocess(command).check_returncode()


def run_model(model_path: str | bytes | os.PathLike, device: og.DeviceType):
model = og.Model(model_path, device)
def run_model(model_path: str | bytes | os.PathLike):
model = og.Model(model_path)

tokenizer = og.Tokenizer(model)
prompts = [
Expand All @@ -41,17 +42,16 @@ def run_model(model_path: str | bytes | os.PathLike, device: og.DeviceType):
sequences = tokenizer.encode_batch(prompts)
params = og.GeneratorParams(model)
params.set_search_options({"max_length": 200})
params.input_ids=sequences
params.input_ids = sequences

output_sequences = model.generate(params)
output = tokenizer.decode_batch(output_sequences)
assert output


if __name__ == "__main__":
with tempfile.TemporaryDirectory() as temp_dir:
device = "cpu" # FIXME: "cuda" if og.is_cuda_available() else "cpu"
download_model(temp_dir, device)
run_model(
temp_dir, og.DeviceType.CPU if device == "cpu" else og.DeviceType.CUDA
)
for model_name in ["microsoft/phi-2"]:
with tempfile.TemporaryDirectory() as temp_dir:
device = "cuda" if og.is_cuda_available() else "cpu"
download_model(temp_dir, device, model_name)
run_model(temp_dir)

0 comments on commit 2923d33

Please sign in to comment.