Run all models in the CI pipeline

microsoft · Mar 11, 2024 · ab7a1e8 · ab7a1e8
1 parent a1edc77
commit ab7a1e8
Show file tree

Hide file tree

Showing 8 changed files with 224 additions and 126 deletions.
diff --git a/.github/workflows/linux-cpu-x64-build.yml b/.github/workflows/linux-cpu-x64-build.yml
@@ -49,6 +49,8 @@ jobs:
           echo "::add-mask::$HF_TOKEN"
           echo "HF_TOKEN=$HF_TOKEN" >> $GITHUB_ENV
 
+      # This will also download all the test models to the test/test_models directory
+      # These models are used by the python tests as well as C#, C++ and others.
       - name: Run the python tests
         run: |
           python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models

diff --git a/test/csharp/TestOnnxRuntimeGenAIAPI.cs b/test/csharp/TestOnnxRuntimeGenAIAPI.cs
@@ -20,6 +20,20 @@ public OnnxRuntimeGenAITests(ITestOutputHelper o)
             this.output = o;
         }
 
+        private class IgnoreOnModelAbsebceFact : FactAttribute
+        {
+            public IgnoreOnModelAbsebceFact()
+            {
+                string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "cpu", "phi-2");
+                bool exists = System.IO.Directory.Exists(modelPath);
+                if (!System.IO.Directory.Exists(modelPath))
+                {
+                    // Skip this test on some machines since the model cannot be downloaded on those machines at runtime.
+                    Skip = "Skipping this test since the model does not exist.";
+                }
+            }
+        }
+
         [Fact(DisplayName = "TestGreedySearch")]
         public void TestGreedySearch()
         {
@@ -72,10 +86,10 @@ public void TestGreedySearch()
             }
         }
 
-        [Fact(DisplayName = "TestTokenizerBatchEncodeDecode")]
+        [IgnoreOnModelAbsebceFact(DisplayName = "TestTokenizerBatchEncodeDecode")]
         public void TestTokenizerBatchEncodeDecode()
         {
-            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "hf-internal-testing", "tiny-random-gpt2-fp32");
+            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "cpu", "phi-2");
             using (var model = new Model(modelPath, DeviceType.CPU))
             {
                 Assert.NotNull(model);
@@ -101,10 +115,10 @@ public void TestTokenizerBatchEncodeDecode()
             }
         }
 
-        [Fact(DisplayName = "TestTokenizerBatchEncodeSingleDecode")]
+        [IgnoreOnModelAbsebceFact(DisplayName = "TestTokenizerBatchEncodeSingleDecode")]
         public void TestTokenizerBatchEncodeSingleDecode()
         {
-            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "hf-internal-testing", "tiny-random-gpt2-fp32");
+            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "cpu", "phi-2");
             using (var model = new Model(modelPath, DeviceType.CPU))
             {
                 Assert.NotNull(model);
@@ -132,10 +146,10 @@ public void TestTokenizerBatchEncodeSingleDecode()
             }
         }
 
-        [Fact(DisplayName = "TestTokenizerBatchEncodeStreamDecode")]
+        [IgnoreOnModelAbsebceFact(DisplayName = "TestTokenizerBatchEncodeStreamDecode")]
         public void TestTokenizerBatchEncodeStreamDecode()
         {
-            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "hf-internal-testing", "tiny-random-gpt2-fp32");
+            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "cpu", "phi-2");
             using (var model = new Model(modelPath, DeviceType.CPU))
             {
                 Assert.NotNull(model);
@@ -168,10 +182,10 @@ public void TestTokenizerBatchEncodeStreamDecode()
             }
         }
 
-        [Fact(DisplayName = "TestTokenizerSingleEncodeDecode")]
+        [IgnoreOnModelAbsebceFact(DisplayName = "TestTokenizerSingleEncodeDecode")]
         public void TestTokenizerSingleEncodeDecode()
         {
-            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "hf-internal-testing", "tiny-random-gpt2-fp32");
+            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "cpu", "phi-2");
             using (var model = new Model(modelPath, DeviceType.CPU))
             {
                 Assert.NotNull(model);
@@ -192,10 +206,10 @@ public void TestTokenizerSingleEncodeDecode()
             }
         }
 
-        [Fact(Skip = "Phi-2 is not available in the CI pipeline")]
+        [IgnoreOnModelAbsebceFact(DisplayName = "TestPhi2")]
         public void TestPhi2()
         {
-            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "phi-2");
+            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "cpu", "phi-2");
             using (var model = new Model(modelPath, DeviceType.CPU))
             {
                 Assert.NotNull(model);

diff --git a/test/python/_test_utils.py b/test/python/_test_utils.py
@@ -50,3 +50,41 @@ def run_subprocess(
             "Subprocess completed. Return code=" + str(completed_process.returncode)
         )
     return completed_process
+
+
+def download_models(download_path, device, num_hidden_layers=None):
+    # python -m onnxruntime_genai.models.builder -m <model_name> -p int4 -e cpu -o <download_path> --extra_options num_hidden_layers=1
+    model_names = {
+        "cpu": {
+            "phi-2": "microsoft/phi-2",
+            "llama": "meta-llama/Llama-2-7b-chat-hf",
+        },
+        "cuda": {
+            "phi-2": "microsoft/phi-2",
+            "gemma": "google/gemma-2b",
+            "llama": "meta-llama/Llama-2-7b-chat-hf",
+        },
+    }
+    for model_name, model_identifier in model_names[device].items():
+        model_path = os.path.join(download_path, device, model_name)
+        if not os.path.exists(model_path):
+            command = [
+                sys.executable,
+                "-m",
+                "onnxruntime_genai.models.builder",
+                "-m",
+                model_identifier,
+                "-p",
+                "int4",
+                "-e",
+                device,
+                "-o",
+                model_path,
+                "--extra_options",
+                f"num_hidden_layers={num_hidden_layers}",
+            ]
+            if num_hidden_layers:
+                command.extend(
+                    ["--extra_options", f"num_hidden_layers={num_hidden_layers}"]
+                )
+            run_subprocess(command).check_returncode()
diff --git a/test/python/conftest.py b/test/python/conftest.py
@@ -1,11 +1,11 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License
 
+import functools
 import os
 import sys
 
 import pytest
-
 from _test_utils import run_subprocess
 
 
@@ -18,39 +18,44 @@ def pytest_addoption(parser):
     )
 
 
-def download_phi2(model_path):
-    # python -m onnxruntime_genai.models.builder -m microsoft/phi-2 -p int4 -e cpu -o model_path
-    device = "cpu"  # FIXME: "cuda" if og.is_cuda_available() else "cpu"
-    command = [
-        sys.executable,
-        "-m",
-        "onnxruntime_genai.models.builder",
-        "-m",
-        "microsoft/phi-2",
-        "-p",
-        "int4",
-        "-e",
-        device,
-        "-o",
-        model_path,
-    ]
-    run_subprocess(command).check_returncode()
+def get_path_for_model_and_device(data_path, model_name, device):
+    return os.path.join(data_path, device, model_name)
+
+
+@pytest.fixture
+def phi2_for(request):
+    return functools.partial(
+        get_path_for_model_and_device,
+        request.config.getoption("--test_models"),
+        "phi-2",
+    )
+
+
+@pytest.fixture
+def gemma_for(request):
+    return functools.partial(
+        get_path_for_model_and_device,
+        request.config.getoption("--test_models"),
+        "gemma",
+    )
+
+
+@pytest.fixture
+def llama_for(request):
+    return functools.partial(
+        get_path_for_model_and_device,
+        request.config.getoption("--test_models"),
+        "llama",
+    )
+
+
+@pytest.fixture
+def path_for_model(request):
+    return functools.partial(
+        get_path_for_model_and_device, request.config.getoption("--test_models")
+    )
 
 
 @pytest.fixture
 def test_data_path(request):
-    def _get_model_path(model_name=None):
-        if not model_name:
-            return request.config.getoption("--test_models")
-
-        if model_name == "phi-2":
-            model_path = os.path.join(
-                request.config.getoption("--test_models"), "phi-2"
-            )
-            if not os.path.exists(model_path):
-                download_phi2(model_path)
-            return model_path
-        else:
-            raise ValueError(f"Unknown model name: {model_name}")
-
-    return _get_model_path
+    return request.config.getoption("--test_models")
diff --git a/test/python/test_onnxruntime_genai.py b/test/python/test_onnxruntime_genai.py
@@ -6,9 +6,11 @@
 import os
 import pathlib
 import sys
+import sysconfig
 from typing import Union
 
-from _test_utils import run_subprocess
+import onnxruntime_genai as og
+from _test_utils import download_models, run_subprocess
 
 logging.basicConfig(
     format="%(asctime)s %(name)s [%(levelname)s] - %(message)s", level=logging.DEBUG
@@ -39,12 +41,19 @@ def run_onnxruntime_genai_api_tests(
 def run_onnxruntime_genai_e2e_tests(
     cwd: Union[str, bytes, os.PathLike],
     log: logging.Logger,
+    test_models: Union[str, bytes, os.PathLike],
 ):
     log.debug("Running: ONNX Runtime GenAI E2E Tests")
 
-    log.debug("Running: Phi-2")
-    command = [sys.executable, "test_onnxruntime_genai_phi2.py"]
-    run_subprocess(command, cwd=cwd, log=log).check_returncode()
+    for model in ["phi-2", "gemma", "llama"]:
+        log.debug(f"Running: {model}")
+        command = [
+            sys.executable,
+            "test_onnxruntime_genai_e2e.py",
+            "--model_path",
+            os.path.join(test_models, "cpu", model),
+        ]
+        run_subprocess(command, cwd=cwd, log=log).check_returncode()
 
 
 def parse_arguments():
@@ -73,12 +82,28 @@ def main():
 
     log.info("Running onnxruntime-genai tests pipeline")
 
+    num_hidden_layers = None if args.e2e else 1
+    if not (sysconfig.get_platform().endswith("arm64") or sys.version_info.minor < 8):
+        download_models(
+            os.path.abspath(args.test_models),
+            "cpu",
+            num_hidden_layers=num_hidden_layers,
+        )
+        if og.is_cuda_available():
+            download_models(
+                os.path.abspath(args.test_models),
+                "cuda",
+                num_hidden_layers=num_hidden_layers,
+            )
+
     run_onnxruntime_genai_api_tests(
         os.path.abspath(args.cwd), log, os.path.abspath(args.test_models)
     )
 
     if args.e2e:
-        run_onnxruntime_genai_e2e_tests(os.path.abspath(args.cwd), log)
+        run_onnxruntime_genai_e2e_tests(
+            os.path.abspath(args.cwd), log, os.path.abspath(args.test_models)
+        )
 
     return 0