diff --git a/.github/workflows/linux-cpu-x64-build.yml b/.github/workflows/linux-cpu-x64-build.yml index 1af392e22..fe5c92ad5 100644 --- a/.github/workflows/linux-cpu-x64-build.yml +++ b/.github/workflows/linux-cpu-x64-build.yml @@ -49,6 +49,8 @@ jobs: echo "::add-mask::$HF_TOKEN" echo "HF_TOKEN=$HF_TOKEN" >> $GITHUB_ENV + # This will also download all the test models to the test/test_models directory + # These models are used by the python tests as well as C#, C++ and others. - name: Run the python tests run: | python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models diff --git a/.github/workflows/win-gpu-x64-build.yml b/.github/workflows/win-gpu-x64-build.yml index 60768a3b8..48afb21d4 100644 --- a/.github/workflows/win-gpu-x64-build.yml +++ b/.github/workflows/win-gpu-x64-build.yml @@ -52,6 +52,10 @@ jobs: cmake --preset windows_x64_cuda_release -T cuda=${{ env.cuda_dir }}\\v${{ env.cuda_version }} -DTEST_PHI2=False cmake --build --preset windows_x64_cuda_release --parallel + - name: Add CUDA to PATH + run: | + echo "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + - name: Install the Python Wheel and Test Dependencies run: | python -m pip install (Get-ChildItem ("$env:cmake_build_dir\wheel\*.whl")) @@ -68,10 +72,6 @@ jobs: run: | python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" - - name: Add CUDA to PATH - run: | - echo "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - - name: Build the C# API and Run the C# Tests run: | cd test\csharp diff --git a/test/csharp/TestOnnxRuntimeGenAIAPI.cs b/test/csharp/TestOnnxRuntimeGenAIAPI.cs index 156f943b4..2113ffdca 100644 --- a/test/csharp/TestOnnxRuntimeGenAIAPI.cs +++ b/test/csharp/TestOnnxRuntimeGenAIAPI.cs @@ -93,7 +93,7 @@ public void TestTopKSearch() int topK = 100; float temp = 0.6f; ulong maxLength = 20; - + string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "cpu", "phi-2"); using (var model = new Model(modelPath)) { @@ -135,7 +135,7 @@ public void TestTopPSearch() float topP = 0.6f; float temp = 0.6f; ulong maxLength = 20; - + string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "cpu", "phi-2"); using (var model = new Model(modelPath)) { @@ -178,7 +178,7 @@ public void TestTopKTopPSearch() float topP = 0.6f; float temp = 0.6f; ulong maxLength = 20; - + string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "cpu", "phi-2"); using (var model = new Model(modelPath)) { diff --git a/test/python/_test_utils.py b/test/python/_test_utils.py index 9dd7f571e..a314454ba 100644 --- a/test/python/_test_utils.py +++ b/test/python/_test_utils.py @@ -50,3 +50,34 @@ def run_subprocess( "Subprocess completed. Return code=" + str(completed_process.returncode) ) return completed_process + + +def download_models(download_path, device): + # python -m onnxruntime_genai.models.builder -m -p int4 -e cpu -o --extra_options num_hidden_layers=1 + model_names = { + "cpu": { + "phi-2": "microsoft/phi-2", + }, + "cuda": { + "phi-2": "microsoft/phi-2", + }, + } + for model_name, model_identifier in model_names[device].items(): + model_path = os.path.join(download_path, device, model_name) + if not os.path.exists(model_path): + command = [ + sys.executable, + "-m", + "onnxruntime_genai.models.builder", + "-m", + model_identifier, + "-p", + "int4", + "-e", + device, + "-o", + model_path, + "--extra_options", + "num_hidden_layers=1", + ] + run_subprocess(command).check_returncode() diff --git a/test/python/test_onnxruntime_genai.py b/test/python/test_onnxruntime_genai.py index 85e7b2713..41d615e51 100644 --- a/test/python/test_onnxruntime_genai.py +++ b/test/python/test_onnxruntime_genai.py @@ -6,9 +6,11 @@ import os import pathlib import sys +import sysconfig from typing import Union -from _test_utils import run_subprocess +import onnxruntime_genai as og +from _test_utils import download_models, run_subprocess logging.basicConfig( format="%(asctime)s %(name)s [%(levelname)s] - %(message)s", level=logging.DEBUG @@ -42,8 +44,7 @@ def run_onnxruntime_genai_e2e_tests( ): log.debug("Running: ONNX Runtime GenAI E2E Tests") - log.debug("Running: Phi-2") - command = [sys.executable, "test_onnxruntime_genai_phi2.py"] + command = [sys.executable, "test_onnxruntime_genai_e2e.py"] run_subprocess(command, cwd=cwd, log=log).check_returncode() @@ -73,11 +74,22 @@ def main(): log.info("Running onnxruntime-genai tests pipeline") - run_onnxruntime_genai_api_tests( - os.path.abspath(args.cwd), log, os.path.abspath(args.test_models) - ) - - if args.e2e: + if not args.e2e: + if not ( + sysconfig.get_platform().endswith("arm64") or sys.version_info.minor < 8 + ): + download_models(os.path.abspath(args.test_models), "cpu") + if og.is_cuda_available(): + download_models( + os.path.abspath(args.test_models), + "cuda", + ) + + run_onnxruntime_genai_api_tests( + os.path.abspath(args.cwd), log, os.path.abspath(args.test_models) + ) + + else: run_onnxruntime_genai_e2e_tests(os.path.abspath(args.cwd), log) return 0 diff --git a/test/python/test_onnxruntime_genai_phi2.py b/test/python/test_onnxruntime_genai_e2e.py similarity index 65% rename from test/python/test_onnxruntime_genai_phi2.py rename to test/python/test_onnxruntime_genai_e2e.py index e2a996a37..cc6f9dde2 100644 --- a/test/python/test_onnxruntime_genai_phi2.py +++ b/test/python/test_onnxruntime_genai_e2e.py @@ -6,18 +6,19 @@ import tempfile import onnxruntime_genai as og - from _test_utils import run_subprocess -def download_model(download_path: str | bytes | os.PathLike, device: str): +def download_model( + download_path: str | bytes | os.PathLike, device: str, model_identifier: str +): # python -m onnxruntime_genai.models.builder -m microsoft/phi-2 -p int4 -e cpu -o download_path command = [ sys.executable, "-m", "onnxruntime_genai.models.builder", "-m", - "microsoft/phi-2", + model_identifier, "-p", "int4", "-e", @@ -28,8 +29,8 @@ def download_model(download_path: str | bytes | os.PathLike, device: str): run_subprocess(command).check_returncode() -def run_model(model_path: str | bytes | os.PathLike, device: og.DeviceType): - model = og.Model(model_path, device) +def run_model(model_path: str | bytes | os.PathLike): + model = og.Model(model_path) tokenizer = og.Tokenizer(model) prompts = [ @@ -41,7 +42,7 @@ def run_model(model_path: str | bytes | os.PathLike, device: og.DeviceType): sequences = tokenizer.encode_batch(prompts) params = og.GeneratorParams(model) params.set_search_options({"max_length": 200}) - params.input_ids=sequences + params.input_ids = sequences output_sequences = model.generate(params) output = tokenizer.decode_batch(output_sequences) @@ -49,9 +50,8 @@ def run_model(model_path: str | bytes | os.PathLike, device: og.DeviceType): if __name__ == "__main__": - with tempfile.TemporaryDirectory() as temp_dir: - device = "cpu" # FIXME: "cuda" if og.is_cuda_available() else "cpu" - download_model(temp_dir, device) - run_model( - temp_dir, og.DeviceType.CPU if device == "cpu" else og.DeviceType.CUDA - ) + for model_name in ["microsoft/phi-2"]: + with tempfile.TemporaryDirectory() as temp_dir: + device = "cuda" if og.is_cuda_available() else "cpu" + download_model(temp_dir, device, model_name) + run_model(temp_dir)