diff --git a/.github/workflows/linux-cpu-x64-build.yml b/.github/workflows/linux-cpu-x64-build.yml
index 1af392e22..fe5c92ad5 100644
--- a/.github/workflows/linux-cpu-x64-build.yml
+++ b/.github/workflows/linux-cpu-x64-build.yml
@@ -49,6 +49,8 @@ jobs:
           echo "::add-mask::$HF_TOKEN"
           echo "HF_TOKEN=$HF_TOKEN" >> $GITHUB_ENV
 
+      # This will also download all the test models to the test/test_models directory
+      # These models are used by the python tests as well as C#, C++ and others.
       - name: Run the python tests
         run: |
           python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models
diff --git a/.github/workflows/win-gpu-x64-build.yml b/.github/workflows/win-gpu-x64-build.yml
index 60768a3b8..48afb21d4 100644
--- a/.github/workflows/win-gpu-x64-build.yml
+++ b/.github/workflows/win-gpu-x64-build.yml
@@ -52,6 +52,10 @@ jobs:
         cmake --preset windows_x64_cuda_release -T cuda=${{ env.cuda_dir }}\\v${{ env.cuda_version }} -DTEST_PHI2=False
         cmake --build --preset windows_x64_cuda_release --parallel
 
+    - name: Add CUDA to PATH
+      run: |
+        echo "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+
     - name: Install the Python Wheel and Test Dependencies
       run: |
         python -m pip install (Get-ChildItem ("$env:cmake_build_dir\wheel\*.whl"))
@@ -68,10 +72,6 @@ jobs:
       run: |
         python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"
 
-    - name: Add CUDA to PATH
-      run: |
-        echo "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-
     - name: Build the C# API and Run the C# Tests
       run: |
         cd test\csharp
diff --git a/test/csharp/TestOnnxRuntimeGenAIAPI.cs b/test/csharp/TestOnnxRuntimeGenAIAPI.cs
index 156f943b4..2113ffdca 100644
--- a/test/csharp/TestOnnxRuntimeGenAIAPI.cs
+++ b/test/csharp/TestOnnxRuntimeGenAIAPI.cs
@@ -93,7 +93,7 @@ public void TestTopKSearch()
             int topK = 100;
             float temp = 0.6f;
             ulong maxLength = 20;
-            
+
             string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "cpu", "phi-2");
             using (var model = new Model(modelPath))
             {
@@ -135,7 +135,7 @@ public void TestTopPSearch()
             float topP = 0.6f;
             float temp = 0.6f;
             ulong maxLength = 20;
-            
+
             string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "cpu", "phi-2");
             using (var model = new Model(modelPath))
             {
@@ -178,7 +178,7 @@ public void TestTopKTopPSearch()
             float topP = 0.6f;
             float temp = 0.6f;
             ulong maxLength = 20;
-            
+
             string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_models", "cpu", "phi-2");
             using (var model = new Model(modelPath))
             {
diff --git a/test/python/_test_utils.py b/test/python/_test_utils.py
index 9dd7f571e..a314454ba 100644
--- a/test/python/_test_utils.py
+++ b/test/python/_test_utils.py
@@ -50,3 +50,34 @@ def run_subprocess(
             "Subprocess completed. Return code=" + str(completed_process.returncode)
         )
     return completed_process
+
+
+def download_models(download_path, device):
+    # python -m onnxruntime_genai.models.builder -m <model_name> -p int4 -e cpu -o <download_path> --extra_options num_hidden_layers=1
+    model_names = {
+        "cpu": {
+            "phi-2": "microsoft/phi-2",
+        },
+        "cuda": {
+            "phi-2": "microsoft/phi-2",
+        },
+    }
+    for model_name, model_identifier in model_names[device].items():
+        model_path = os.path.join(download_path, device, model_name)
+        if not os.path.exists(model_path):
+            command = [
+                sys.executable,
+                "-m",
+                "onnxruntime_genai.models.builder",
+                "-m",
+                model_identifier,
+                "-p",
+                "int4",
+                "-e",
+                device,
+                "-o",
+                model_path,
+                "--extra_options",
+                "num_hidden_layers=1",
+            ]
+            run_subprocess(command).check_returncode()
diff --git a/test/python/test_onnxruntime_genai.py b/test/python/test_onnxruntime_genai.py
index 85e7b2713..41d615e51 100644
--- a/test/python/test_onnxruntime_genai.py
+++ b/test/python/test_onnxruntime_genai.py
@@ -6,9 +6,11 @@
 import os
 import pathlib
 import sys
+import sysconfig
 from typing import Union
 
-from _test_utils import run_subprocess
+import onnxruntime_genai as og
+from _test_utils import download_models, run_subprocess
 
 logging.basicConfig(
     format="%(asctime)s %(name)s [%(levelname)s] - %(message)s", level=logging.DEBUG
@@ -42,8 +44,7 @@ def run_onnxruntime_genai_e2e_tests(
 ):
     log.debug("Running: ONNX Runtime GenAI E2E Tests")
 
-    log.debug("Running: Phi-2")
-    command = [sys.executable, "test_onnxruntime_genai_phi2.py"]
+    command = [sys.executable, "test_onnxruntime_genai_e2e.py"]
     run_subprocess(command, cwd=cwd, log=log).check_returncode()
 
 
@@ -73,11 +74,22 @@ def main():
 
     log.info("Running onnxruntime-genai tests pipeline")
 
-    run_onnxruntime_genai_api_tests(
-        os.path.abspath(args.cwd), log, os.path.abspath(args.test_models)
-    )
-
-    if args.e2e:
+    if not args.e2e:
+        if not (
+            sysconfig.get_platform().endswith("arm64") or sys.version_info.minor < 8
+        ):
+            download_models(os.path.abspath(args.test_models), "cpu")
+            if og.is_cuda_available():
+                download_models(
+                    os.path.abspath(args.test_models),
+                    "cuda",
+                )
+
+        run_onnxruntime_genai_api_tests(
+            os.path.abspath(args.cwd), log, os.path.abspath(args.test_models)
+        )
+
+    else:
         run_onnxruntime_genai_e2e_tests(os.path.abspath(args.cwd), log)
 
     return 0
diff --git a/test/python/test_onnxruntime_genai_phi2.py b/test/python/test_onnxruntime_genai_e2e.py
similarity index 65%
rename from test/python/test_onnxruntime_genai_phi2.py
rename to test/python/test_onnxruntime_genai_e2e.py
index e2a996a37..cc6f9dde2 100644
--- a/test/python/test_onnxruntime_genai_phi2.py
+++ b/test/python/test_onnxruntime_genai_e2e.py
@@ -6,18 +6,19 @@
 import tempfile
 
 import onnxruntime_genai as og
-
 from _test_utils import run_subprocess
 
 
-def download_model(download_path: str | bytes | os.PathLike, device: str):
+def download_model(
+    download_path: str | bytes | os.PathLike, device: str, model_identifier: str
+):
     # python -m onnxruntime_genai.models.builder -m microsoft/phi-2 -p int4 -e cpu -o download_path
     command = [
         sys.executable,
         "-m",
         "onnxruntime_genai.models.builder",
         "-m",
-        "microsoft/phi-2",
+        model_identifier,
         "-p",
         "int4",
         "-e",
@@ -28,8 +29,8 @@ def download_model(download_path: str | bytes | os.PathLike, device: str):
     run_subprocess(command).check_returncode()
 
 
-def run_model(model_path: str | bytes | os.PathLike, device: og.DeviceType):
-    model = og.Model(model_path, device)
+def run_model(model_path: str | bytes | os.PathLike):
+    model = og.Model(model_path)
 
     tokenizer = og.Tokenizer(model)
     prompts = [
@@ -41,7 +42,7 @@ def run_model(model_path: str | bytes | os.PathLike, device: og.DeviceType):
     sequences = tokenizer.encode_batch(prompts)
     params = og.GeneratorParams(model)
     params.set_search_options({"max_length": 200})
-    params.input_ids=sequences
+    params.input_ids = sequences
 
     output_sequences = model.generate(params)
     output = tokenizer.decode_batch(output_sequences)
@@ -49,9 +50,8 @@ def run_model(model_path: str | bytes | os.PathLike, device: og.DeviceType):
 
 
 if __name__ == "__main__":
-    with tempfile.TemporaryDirectory() as temp_dir:
-        device = "cpu"  # FIXME: "cuda" if og.is_cuda_available() else "cpu"
-        download_model(temp_dir, device)
-        run_model(
-            temp_dir, og.DeviceType.CPU if device == "cpu" else og.DeviceType.CUDA
-        )
+    for model_name in ["microsoft/phi-2"]:
+        with tempfile.TemporaryDirectory() as temp_dir:
+            device = "cuda" if og.is_cuda_available() else "cpu"
+            download_model(temp_dir, device, model_name)
+            run_model(temp_dir)