Skip to content

Commit

Permalink
Merge branch 'refs/heads/main' into Cjian/gha-ort-nightlhy
Browse files Browse the repository at this point in the history
# Conflicts:
#	.github/workflows/linux-cpu-x64-build.yml
#	.github/workflows/linux-gpu-x64-build.yml
#	.github/workflows/mac-cpu-arm64-build.yml
#	.github/workflows/win-cpu-x64-build.yml
#	.github/workflows/win-gpu-x64-build.yml
  • Loading branch information
jchen351 committed Apr 19, 2024
2 parents 9315bac + 2a7a890 commit 1ef8525
Show file tree
Hide file tree
Showing 92 changed files with 3,339 additions and 582 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/linux-cpu-arm64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
ort_dir: "onnxruntime-linux-aarch64-1.17.1"
ort_zip: "onnxruntime-linux-aarch64-1.17.1.tgz"
ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-linux-aarch64-1.17.1.tgz"
ort_dir: "onnxruntime-linux-aarch64-1.17.3"
ort_zip: "onnxruntime-linux-aarch64-1.17.3.tgz"
ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-linux-aarch64-1.17.3.tgz"
jobs:
linux-cpu-arm64-build:
runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-Ubuntu2004-ARM-CPU" ]
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/linux-cpu-x64-nightly-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
ort_dir: "onnxruntime-linux-x64-1.17.1"
ort_zip: "onnxruntime-linux-x64-1.17.1.tgz"
ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-linux-x64-1.17.1.tgz"
ort_dir: "onnxruntime-linux-x64-1.17.3"
ort_zip: "onnxruntime-linux-x64-1.17.3.tgz"
ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-linux-x64-1.17.3.tgz"
jobs:
job:
runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-Ubuntu2204-AMD-CPU" ]
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/win-cpu-arm64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
ort_dir: "onnxruntime-win-arm64-1.17.1"
ort_dir: "onnxruntime-win-arm64-1.17.3"
ort_zip: "$(ort_dir).zip"
ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/$(ort_zip)"
ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/$(ort_zip)"
binaryDir: 'build/cpu'

jobs:
Expand All @@ -33,7 +33,7 @@ jobs:

- name: Download OnnxRuntime
run: |
$env:ort_url = "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-arm64-1.17.1.zip"
$env:ort_url = "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-win-arm64-1.17.3.zip"
Invoke-WebRequest -Uri $env:ort_url -OutFile $env:ort_zip
- name: Unzip OnnxRuntime
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@ examples/python/genai_models
examples/python/hf_cache

!test/test_models/hf-internal-testing/
!test/test_models/hf-internal-testing/tiny-random-gpt2*/*.onnx
!test/test_models/hf-internal-testing/tiny-random-gpt2*/*.onnx

.ipynb_checkpoints/
2 changes: 1 addition & 1 deletion .pipelines/nuget-publishing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ parameters:
- name: ort_version
displayName: 'OnnxRuntime version'
type: string
default: '1.17.1'
default: '1.17.3'

- name: cuda_version
displayName: 'CUDA version'
Expand Down
2 changes: 1 addition & 1 deletion .pipelines/pypl-publishing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ parameters:
- name: ort_version
displayName: 'OnnxRuntime version'
type: string
default: '1.17.1'
default: '1.17.3'

- name: cuda_version
displayName: 'CUDA version'
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ See full documentation at [https://onnxruntime.ai/docs/genai].
1. Build the model
```shell
python -m onnxruntime_genai.models.builder -m microsoft/phi-2 -e cpu -p int4 -o ./models/phi2
# You can append --extra_options enable_cuda_graph=1 to build an onnx model that supports using cuda graph in ORT.
```

2. Run inference
Expand All @@ -75,6 +76,8 @@ tokens = tokenizer.encode(prompt)

params = og.GeneratorParams(model)
params.set_search_options({"max_length":200})
# Add the following line to enable cuda graph by passing the maximum batch size.
# params.try_use_cuda_graph_with_max_batch_size(16)
params.input_ids = tokens

output_tokens = model.generate(params)
Expand Down
2 changes: 1 addition & 1 deletion VERSION_INFO
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.0rc4
0.2.0-dev
6 changes: 3 additions & 3 deletions benchmark/python/benchmark_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def generate_prompt(model, tokenizer, prompt_length) -> str:
prompt = "a"
tokens = tokenizer.encode(prompt)
params=og.GeneratorParams(model)
params.set_search_options({"do_sample":True, "top_k":5, "temperature":temperature, "max_length":prompt_length, "min_length":prompt_length+1})
params.set_search_options(do_sample=True, top_k=5, temperature=temperature, max_length=prompt_length, min_length=prompt_length+1)
params.input_ids = tokens
generator=og.Generator(model, params)
while not generator.is_done():
Expand Down Expand Up @@ -68,7 +68,7 @@ def main(args):

params = og.GeneratorParams(model)
params.input_ids = tokens
params.set_search_options({"do_sample":True, "top_k":args.top_k, "top_p":args.top_p, "temperature":temperature, "max_length":max_length, "min_length":max_length})
params.set_search_options(do_sample=True, top_k=args.top_k, top_p=args.top_p, temperature=temperature, max_length=max_length, min_length=max_length)

if args.verbose: print("Running warmup runs...")
for _ in tqdm(range(args.warmup)):
Expand Down Expand Up @@ -99,7 +99,7 @@ def main(args):
# Prepare run
params = og.GeneratorParams(model)
params.input_ids = tokens
params.set_search_options({"max_length":max_length, "min_length":max_length})
params.set_search_options(max_length=max_length, min_length=max_length)
generator = og.Generator(model, params)

# Measure prompt processing
Expand Down
6 changes: 5 additions & 1 deletion build.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ def is_linux():
"""Check if the current platform is Linux."""
return sys.platform.startswith("linux")

def is_mac():
"""Check if the current platform is MacOS"""
return sys.platform.startswith("darwin")


def platform():
"""Get the current platform."""
Expand Down Expand Up @@ -110,7 +114,7 @@ def build(
Args:
skip_wheel: Whether to skip building the Python wheel. Defaults to False.
"""
if not is_windows() and not is_linux():
if not is_windows() and not is_linux() and not is_mac():
raise OSError(f"Unsupported platform {platform()}.")

if cuda_home and not use_cuda:
Expand Down
Loading

0 comments on commit 1ef8525

Please sign in to comment.