Merge branch 'main' into Cjian/py

# Conflicts: # .pipelines/pypl-publishing.yml
microsoft · Feb 27, 2024 · 7442d06 · 7442d06
2 parents 2e28669 + 0a76f21
commit 7442d06
Show file tree

Hide file tree

Showing 71 changed files with 1,384 additions and 195,450 deletions.
diff --git a/.github/workflows/win-cpu-arm64-build.yml b/.github/workflows/win-cpu-arm64-build.yml
@@ -45,15 +45,20 @@ jobs:
           cmake -G "Visual Studio 17 2022" -A arm64 . -DCMAKE_BUILD_TYPE=Release -DUSE_CUDA=OFF
           cmake --build . --config Release --parallel
 
-      - name: Install the python wheel and test dependencies
+      - name: Install the Python Wheel and Test Dependencies
         run: |
           python -m pip install (Get-ChildItem ("wheel\*.whl"))
           python -m pip install -r test\python\requirements.txt
 
-      - name: Run the python tests
+      - name: Run the Python Tests
         run: |
           python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"
 
+      - name: Build the C# API and Run the C# Tests
+        run: |
+          cd test\csharp
+          dotnet test /p:NativeBuildOutputDir="..\..\Release\"
+
       - name: Verify Build Artifacts
         if: always()
         run: |

diff --git a/.github/workflows/win-cpu-x64-build.yml b/.github/workflows/win-cpu-x64-build.yml
@@ -62,10 +62,15 @@ jobs:
           python -m pip install (Get-ChildItem ("wheel\*.whl"))
           python -m pip install -r test\python\requirements.txt
 
-      - name: Run the python tests
+      - name: Run the Python Tests
         run: |
           python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"
 
+      - name: Build the C# API and Run the C# Tests
+        run: |
+          cd test\csharp
+          dotnet test /p:NativeBuildOutputDir="..\..\Release\"
+
       - name: Verify Build Artifacts
         if: always()
         run: |

diff --git a/.github/workflows/win-gpu-x64-build.yml b/.github/workflows/win-gpu-x64-build.yml
@@ -54,15 +54,24 @@ jobs:
           cmake -G "Visual Studio 17 2022" -A x64 -T cuda=${{ env.cuda_dir }}\\v${{ env.cuda_version }} .. -DCMAKE_BUILD_TYPE=Release -DUSE_CUDA=TRUE
           cmake --build . --config Release --parallel
 
-      - name: Install the python wheel and test dependencies
+      - name: Install the Python Wheel and Test Dependencies
         run: |
           python -m pip install (Get-ChildItem ("build\wheel\*.whl"))
           python -m pip install -r test\python\requirements.txt
 
-      - name: Run the python tests
+      - name: Run the Python Tests
         run: |
           python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"
 
+      - name: Add CUDA to PATH
+        run: |
+          echo "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+
+      - name: Build the C# API and run the C# tests
+        run: |
+          cd test\csharp
+          dotnet test /p:Configuration=Release /p:NativeBuildOutputDir="..\..\build\Release\"
+
       - name: Verify Build Artifacts
         if: always()
         run: |

diff --git a/.gitignore b/.gitignore
@@ -15,6 +15,7 @@ test/csharp/obj/
 /*.csv
 .idea
 cache_dir
+example-models
 *.onnx
 *.onnx.data
 

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -72,11 +72,6 @@ if(USE_CUDA AND CMAKE_CUDA_COMPILER)
   list(APPEND generator_srcs ${generator_cuda_srcs})
   add_compile_definitions(USE_CUDA=1)
   include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}")
-  if(ENABLE_TESTS AND TEST_PHI2)
-    add_compile_definitions(TEST_PHI2=1)
-  else()
-    add_compile_definitions(TEST_PHI2=0)
-  endif()
 elseif(USE_CUDA)
   # USE_CUDA is true but cmake could not find the cuda compiler
   message(FATAL_ERROR "USE_CUDA is ON but no cuda compiler was found.")
@@ -85,6 +80,12 @@ else()
   list(REMOVE_ITEM generator_srcs ${generator_cuda_srcs})
 endif()
 
+if(ENABLE_TESTS AND TEST_PHI2)
+  add_compile_definitions(TEST_PHI2=1)
+else()
+  add_compile_definitions(TEST_PHI2=0)
+endif()
+
 add_library(onnxruntime-genai SHARED ${generator_srcs})
 add_library(onnxruntime-genai-static STATIC ${generator_srcs})
 target_include_directories(onnxruntime-genai PRIVATE ${ORT_HEADER_DIR})
@@ -141,7 +142,7 @@ if(NOT EXISTS "${ORT_LIB_DIR}/${ONNXRUNTIME_LIB}")
   message(FATAL_ERROR "Expected the ONNX Runtime library to be found at ${ORT_LIB_DIR}/${ONNXRUNTIME_LIB}. Actual: Not found.")
 endif()
 if(NOT EXISTS "${ORT_HEADER_DIR}/onnxruntime_c_api.h")
-  message(FATAL_ERROR "Expected the ONNX Runtime C API header to be found at "${ORT_HEADER_DIR}/onnxruntime_c_api.h". Actual: Not found.")
+  message(FATAL_ERROR "Expected the ONNX Runtime C API header to be found at \"${ORT_HEADER_DIR}/onnxruntime_c_api.h\". Actual: Not found.")
 endif()
 if(USE_CUDA AND NOT EXISTS "${ORT_LIB_DIR}/${ONNXRUNTIME_PROVIDERS_CUDA_LIB}")
   message(FATAL_ERROR "Expected the ONNX Runtime providers cuda library to be found at ${ORT_LIB_DIR}/${ONNXRUNTIME_PROVIDERS_CUDA_LIB}. Actual: Not found.")
@@ -153,11 +154,11 @@ target_link_libraries(onnxruntime-genai PRIVATE ${ONNXRUNTIME_LIB})
 
 if(USE_CUDA AND CMAKE_CUDA_COMPILER)
   set_target_properties(onnxruntime-genai PROPERTIES LINKER_LANGUAGE CUDA)
-  target_link_libraries(onnxruntime-genai PRIVATE cublasLt cublas cudnn curand cufft cudart)
+  target_link_libraries(onnxruntime-genai PRIVATE cublasLt cublas curand cufft cudart)
   #  onnxruntime-genai-static is statically linked under Windows
   if(NOT WIN32)
     set_target_properties(onnxruntime-genai-static PROPERTIES LINKER_LANGUAGE CUDA)
-    target_link_libraries(onnxruntime-genai-static PRIVATE cublasLt cublas cudnn curand cufft cudart)
+    target_link_libraries(onnxruntime-genai-static PRIVATE cublasLt cublas curand cufft cudart)
   endif()
 endif()
 

diff --git a/README.md b/README.md
@@ -15,6 +15,7 @@ Users can call a high level `generate()` method, or run each iteration of the mo
 * Supported model architectures:
   * Phi-2
   * Llama
+  * Gemma
   * GPT
 * Supported targets:   
   * CPU
@@ -38,6 +39,40 @@ Users can call a high level `generate()` method, or run each iteration of the mo
 * Automatic model download and cache
 * More model architectures
 
+## Sample code for phi-2 in Python
+
+Install onnxruntime-genai.
+
+(Temporary) Build and install from source according to the instructions below.
+
+
+```python
+import onnxruntime_genai as og
+
+model=og.Model(f'models/microsoft/phi-2', device_type)
+
+tokenizer = model.create_tokenizer()
+
+prompt = '''def print_prime(n):
+    """
+    Print all primes between 1 and n
+    """'''
+
+tokens = tokenizer.encode(prompt)
+
+params=og.SearchParams(model)
+params.max_length = 200
+params.input_ids = tokens
+
+output_tokens=model.generate(params)
+
+text = tokenizer.decode(output_tokens)
+
+print("Output:")
+print(text)
+```
+
+
 ## Build from source
 
 This step requires `cmake` to be installed.
@@ -46,26 +81,64 @@ This step requires `cmake` to be installed.
 
    ```bash
    git clone https://github.com/microsoft/onnxruntime-genai
+   cd onnxruntime-genai
    ```
 
 2. Install ONNX Runtime
 
-These instructions are for the Linux GPU build of ONNX Runtime. Replace the location with the operating system and target of choice. 
+    By default, the onnxruntime-genai build expects to find the ONNX Runtime include and binaries in a folder called `ort` in the root directory of onnxruntime-genai. You can put the ONNX Runtime files in a different location and specify this location to the onnxruntime-genai build. These instructions use ORT_HOME as the location.
 
-   ```bash
-   mkdir -p ort
-   cd ort
-   wget https://github.com/microsoft/onnxruntime/releases/download/v1.17.0/onnxruntime-linux-x64-gpu-1.17.0.tgz
-   tar xvzf onnxruntime-linux-x64-gpu-1.17.0.tgz 
-   mv onnxruntime-linux-x64-gpu-1.17.0/include .
-   mv onnxruntime-linux-x64-gpu-1.17.0/lib .
-   ```
+    * Install from release
+
+      These instructions are for the Linux GPU build of ONNX Runtime. Replace the location with the operating system and target of choice. 
+
+      ```bash
+      cd $ORT_HOME
+      wget https://github.com/microsoft/onnxruntime/releases/download/v1.17.0/onnxruntime-linux-x64-gpu-1.17.0.tgz
+      tar xvzf onnxruntime-linux-x64-gpu-1.17.0.tgz 
+      mv onnxruntime-linux-x64-gpu-1.17.0/include .
+      mv onnxruntime-linux-x64-gpu-1.17.0/lib .
+      ```
+
+    * Or build from source
+
+      ```
+      git clone https://github.com/microsoft/onnxruntime.git
+      cd onnxruntime
+      ```
+
+      Create include and lib folders in the ORT_HOME directory
+
+      ```bash
+      mkdir $ORT_HOME/include
+      mkdir $ORT_HOME/lib
+      ```
+
+      Build from source and copy the include and libraries into ORT_HOME
+
+      On Windows
+
+      ```cmd
+      build.bat --config RelWithDebInfo --build_shared_lib --skip_tests --parallel [--use_cuda]
+      copy include\onnxruntime\core\session\onnxruntime_c_api.h $ORT_HOME\include
+      copy build\Windows\RelWithDebInfo\RelWithDebInfo\*.dll $ORT_HOME\lib
+      ```
+
+      On Linux
+
+      ```cmd
+      ./build.sh --build_shared_lib --skip_tests --parallel [--use_cuda]
+      cp include/onnxruntime/core/session/onnxruntime_c_api.h $ORT_HOME/include
+      cp build/Linux/RelWithDebInfo/libonnxruntime*.so* $ORT_HOME/lib
+      ```
 
 3. Build onnxruntime-genai
 
+   If you are building for CUDA, add the cuda_home argument.
+
    ```bash
    cd ..
-   python build.py --cuda_home <path_to_cuda_home> --cudnn_home <path_to_cudnn_home>
+   python build.py [--cuda_home <path_to_cuda_home>]
    ```
 
 4. Install Python wheel
@@ -81,49 +154,23 @@ ONNX models are run from a local folder, via a string supplied to the `Model()`
 
 To source `microsoft/phi-2` optimized for your target, download and run the following script. You will need to be logged into HuggingFace via the CLI to run the script.
 
+Install model builder dependencies.
 
 ```bash
-wget https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/src/python/models/export.py
+pip install numpy
+pip install transformers
+pip install torch
+pip install onnx
+pip install onnxruntime
 ```
 
+
 Export int4 CPU version 
 ```bash
 huggingface-cli login --token <your HuggingFace token>
-python export.py -m microsoft/phi-2 -p int4 -e cpu -o phi2-int4-cpu.onnx
+python -m onnxruntime_genai.models.builder -m microsoft/phi-2 -p int4 -e cpu -o <model folder>
 ```
 
-## Sample code for phi-2 in Python
-
-Install onnxruntime-genai.
-
-(Temporary) Build and install from source according to the instructions below.
-
-
-```python
-import onnxruntime_genai as og
-
-model=og.Model(f'models/microsoft/phi-2', device_type)
-
-tokenizer = model.create_tokenizer()
-
-prompt = '''def print_prime(n):
-    """
-    Print all primes between 1 and n
-    """'''
-
-tokens = tokenizer.encode(prompt)
-
-params=og.SearchParams(model)
-params.max_length = 200
-params.input_ids = tokens
-
-output_tokens=model.generate(params)
-
-text = tokenizer.decode(output_tokens)
-
-print("Output:")
-print(text)
-```
 
 
 ## Contributing
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,6 +15,7 @@ test/csharp/obj/ @@
     /*.csv
     .idea
     cache_dir
+    example-models
     *.onnx
     *.onnx.data
@@ Expand Down @@