Skip to content

Commit

Permalink
Merge branch 'main' into Cjian/py
Browse files Browse the repository at this point in the history
# Conflicts:
#	.pipelines/pypl-publishing.yml
  • Loading branch information
jchen351 committed Feb 27, 2024
2 parents 2e28669 + 0a76f21 commit 7442d06
Show file tree
Hide file tree
Showing 71 changed files with 1,384 additions and 195,450 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/win-cpu-arm64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,20 @@ jobs:
cmake -G "Visual Studio 17 2022" -A arm64 . -DCMAKE_BUILD_TYPE=Release -DUSE_CUDA=OFF
cmake --build . --config Release --parallel
- name: Install the python wheel and test dependencies
- name: Install the Python Wheel and Test Dependencies
run: |
python -m pip install (Get-ChildItem ("wheel\*.whl"))
python -m pip install -r test\python\requirements.txt
- name: Run the python tests
- name: Run the Python Tests
run: |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"
- name: Build the C# API and Run the C# Tests
run: |
cd test\csharp
dotnet test /p:NativeBuildOutputDir="..\..\Release\"
- name: Verify Build Artifacts
if: always()
run: |
Expand Down
7 changes: 6 additions & 1 deletion .github/workflows/win-cpu-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,15 @@ jobs:
python -m pip install (Get-ChildItem ("wheel\*.whl"))
python -m pip install -r test\python\requirements.txt
- name: Run the python tests
- name: Run the Python Tests
run: |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"
- name: Build the C# API and Run the C# Tests
run: |
cd test\csharp
dotnet test /p:NativeBuildOutputDir="..\..\Release\"
- name: Verify Build Artifacts
if: always()
run: |
Expand Down
13 changes: 11 additions & 2 deletions .github/workflows/win-gpu-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,24 @@ jobs:
cmake -G "Visual Studio 17 2022" -A x64 -T cuda=${{ env.cuda_dir }}\\v${{ env.cuda_version }} .. -DCMAKE_BUILD_TYPE=Release -DUSE_CUDA=TRUE
cmake --build . --config Release --parallel
- name: Install the python wheel and test dependencies
- name: Install the Python Wheel and Test Dependencies
run: |
python -m pip install (Get-ChildItem ("build\wheel\*.whl"))
python -m pip install -r test\python\requirements.txt
- name: Run the python tests
- name: Run the Python Tests
run: |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"
- name: Add CUDA to PATH
run: |
echo "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Build the C# API and run the C# tests
run: |
cd test\csharp
dotnet test /p:Configuration=Release /p:NativeBuildOutputDir="..\..\build\Release\"
- name: Verify Build Artifacts
if: always()
run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ test/csharp/obj/
/*.csv
.idea
cache_dir
example-models
*.onnx
*.onnx.data

Expand Down
17 changes: 9 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,6 @@ if(USE_CUDA AND CMAKE_CUDA_COMPILER)
list(APPEND generator_srcs ${generator_cuda_srcs})
add_compile_definitions(USE_CUDA=1)
include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}")
if(ENABLE_TESTS AND TEST_PHI2)
add_compile_definitions(TEST_PHI2=1)
else()
add_compile_definitions(TEST_PHI2=0)
endif()
elseif(USE_CUDA)
# USE_CUDA is true but cmake could not find the cuda compiler
message(FATAL_ERROR "USE_CUDA is ON but no cuda compiler was found.")
Expand All @@ -85,6 +80,12 @@ else()
list(REMOVE_ITEM generator_srcs ${generator_cuda_srcs})
endif()

if(ENABLE_TESTS AND TEST_PHI2)
add_compile_definitions(TEST_PHI2=1)
else()
add_compile_definitions(TEST_PHI2=0)
endif()

add_library(onnxruntime-genai SHARED ${generator_srcs})
add_library(onnxruntime-genai-static STATIC ${generator_srcs})
target_include_directories(onnxruntime-genai PRIVATE ${ORT_HEADER_DIR})
Expand Down Expand Up @@ -141,7 +142,7 @@ if(NOT EXISTS "${ORT_LIB_DIR}/${ONNXRUNTIME_LIB}")
message(FATAL_ERROR "Expected the ONNX Runtime library to be found at ${ORT_LIB_DIR}/${ONNXRUNTIME_LIB}. Actual: Not found.")
endif()
if(NOT EXISTS "${ORT_HEADER_DIR}/onnxruntime_c_api.h")
message(FATAL_ERROR "Expected the ONNX Runtime C API header to be found at "${ORT_HEADER_DIR}/onnxruntime_c_api.h". Actual: Not found.")
message(FATAL_ERROR "Expected the ONNX Runtime C API header to be found at \"${ORT_HEADER_DIR}/onnxruntime_c_api.h\". Actual: Not found.")
endif()
if(USE_CUDA AND NOT EXISTS "${ORT_LIB_DIR}/${ONNXRUNTIME_PROVIDERS_CUDA_LIB}")
message(FATAL_ERROR "Expected the ONNX Runtime providers cuda library to be found at ${ORT_LIB_DIR}/${ONNXRUNTIME_PROVIDERS_CUDA_LIB}. Actual: Not found.")
Expand All @@ -153,11 +154,11 @@ target_link_libraries(onnxruntime-genai PRIVATE ${ONNXRUNTIME_LIB})

if(USE_CUDA AND CMAKE_CUDA_COMPILER)
set_target_properties(onnxruntime-genai PROPERTIES LINKER_LANGUAGE CUDA)
target_link_libraries(onnxruntime-genai PRIVATE cublasLt cublas cudnn curand cufft cudart)
target_link_libraries(onnxruntime-genai PRIVATE cublasLt cublas curand cufft cudart)
# onnxruntime-genai-static is statically linked under Windows
if(NOT WIN32)
set_target_properties(onnxruntime-genai-static PROPERTIES LINKER_LANGUAGE CUDA)
target_link_libraries(onnxruntime-genai-static PRIVATE cublasLt cublas cudnn curand cufft cudart)
target_link_libraries(onnxruntime-genai-static PRIVATE cublasLt cublas curand cufft cudart)
endif()
endif()

Expand Down
135 changes: 91 additions & 44 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Users can call a high level `generate()` method, or run each iteration of the mo
* Supported model architectures:
* Phi-2
* Llama
* Gemma
* GPT
* Supported targets:
* CPU
Expand All @@ -38,6 +39,40 @@ Users can call a high level `generate()` method, or run each iteration of the mo
* Automatic model download and cache
* More model architectures

## Sample code for phi-2 in Python

Install onnxruntime-genai.

(Temporary) Build and install from source according to the instructions below.


```python
import onnxruntime_genai as og

model=og.Model(f'models/microsoft/phi-2', device_type)

tokenizer = model.create_tokenizer()

prompt = '''def print_prime(n):
"""
Print all primes between 1 and n
"""'''

tokens = tokenizer.encode(prompt)

params=og.SearchParams(model)
params.max_length = 200
params.input_ids = tokens

output_tokens=model.generate(params)

text = tokenizer.decode(output_tokens)

print("Output:")
print(text)
```


## Build from source

This step requires `cmake` to be installed.
Expand All @@ -46,26 +81,64 @@ This step requires `cmake` to be installed.

```bash
git clone https://github.com/microsoft/onnxruntime-genai
cd onnxruntime-genai
```

2. Install ONNX Runtime

These instructions are for the Linux GPU build of ONNX Runtime. Replace the location with the operating system and target of choice.
By default, the onnxruntime-genai build expects to find the ONNX Runtime include and binaries in a folder called `ort` in the root directory of onnxruntime-genai. You can put the ONNX Runtime files in a different location and specify this location to the onnxruntime-genai build. These instructions use ORT_HOME as the location.

```bash
mkdir -p ort
cd ort
wget https://github.com/microsoft/onnxruntime/releases/download/v1.17.0/onnxruntime-linux-x64-gpu-1.17.0.tgz
tar xvzf onnxruntime-linux-x64-gpu-1.17.0.tgz
mv onnxruntime-linux-x64-gpu-1.17.0/include .
mv onnxruntime-linux-x64-gpu-1.17.0/lib .
```
* Install from release

These instructions are for the Linux GPU build of ONNX Runtime. Replace the location with the operating system and target of choice.

```bash
cd $ORT_HOME
wget https://github.com/microsoft/onnxruntime/releases/download/v1.17.0/onnxruntime-linux-x64-gpu-1.17.0.tgz
tar xvzf onnxruntime-linux-x64-gpu-1.17.0.tgz
mv onnxruntime-linux-x64-gpu-1.17.0/include .
mv onnxruntime-linux-x64-gpu-1.17.0/lib .
```

* Or build from source

```
git clone https://github.com/microsoft/onnxruntime.git
cd onnxruntime
```

Create include and lib folders in the ORT_HOME directory

```bash
mkdir $ORT_HOME/include
mkdir $ORT_HOME/lib
```

Build from source and copy the include and libraries into ORT_HOME

On Windows

```cmd
build.bat --config RelWithDebInfo --build_shared_lib --skip_tests --parallel [--use_cuda]
copy include\onnxruntime\core\session\onnxruntime_c_api.h $ORT_HOME\include
copy build\Windows\RelWithDebInfo\RelWithDebInfo\*.dll $ORT_HOME\lib
```

On Linux

```cmd
./build.sh --build_shared_lib --skip_tests --parallel [--use_cuda]
cp include/onnxruntime/core/session/onnxruntime_c_api.h $ORT_HOME/include
cp build/Linux/RelWithDebInfo/libonnxruntime*.so* $ORT_HOME/lib
```

3. Build onnxruntime-genai

If you are building for CUDA, add the cuda_home argument.

```bash
cd ..
python build.py --cuda_home <path_to_cuda_home> --cudnn_home <path_to_cudnn_home>
python build.py [--cuda_home <path_to_cuda_home>]
```

4. Install Python wheel
Expand All @@ -81,49 +154,23 @@ ONNX models are run from a local folder, via a string supplied to the `Model()`
To source `microsoft/phi-2` optimized for your target, download and run the following script. You will need to be logged into HuggingFace via the CLI to run the script.
Install model builder dependencies.
```bash
wget https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/src/python/models/export.py
pip install numpy
pip install transformers
pip install torch
pip install onnx
pip install onnxruntime
```


Export int4 CPU version
```bash
huggingface-cli login --token <your HuggingFace token>
python export.py -m microsoft/phi-2 -p int4 -e cpu -o phi2-int4-cpu.onnx
python -m onnxruntime_genai.models.builder -m microsoft/phi-2 -p int4 -e cpu -o <model folder>
```

## Sample code for phi-2 in Python

Install onnxruntime-genai.

(Temporary) Build and install from source according to the instructions below.


```python
import onnxruntime_genai as og

model=og.Model(f'models/microsoft/phi-2', device_type)

tokenizer = model.create_tokenizer()

prompt = '''def print_prime(n):
"""
Print all primes between 1 and n
"""'''

tokens = tokenizer.encode(prompt)

params=og.SearchParams(model)
params.max_length = 200
params.input_ids = tokens

output_tokens=model.generate(params)

text = tokenizer.decode(output_tokens)

print("Output:")
print(text)
```


## Contributing
Expand Down
Loading

0 comments on commit 7442d06

Please sign in to comment.