diff --git a/.github/workflows/linux-cpu-arm64-build.yml b/.github/workflows/linux-cpu-arm64-build.yml index 5018bdbb6..3b55c3fe5 100644 --- a/.github/workflows/linux-cpu-arm64-build.yml +++ b/.github/workflows/linux-cpu-arm64-build.yml @@ -4,9 +4,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: - ort_dir: "onnxruntime-linux-aarch64-1.17.1" - ort_zip: "onnxruntime-linux-aarch64-1.17.1.tgz" - ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-linux-aarch64-1.17.1.tgz" + ort_dir: "onnxruntime-linux-aarch64-1.17.3" + ort_zip: "onnxruntime-linux-aarch64-1.17.3.tgz" + ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-linux-aarch64-1.17.3.tgz" jobs: linux-cpu-arm64-build: runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-Ubuntu2004-ARM-CPU" ] diff --git a/.github/workflows/linux-cpu-x64-build.yml b/.github/workflows/linux-cpu-x64-build.yml index da202e19c..2e1c03aab 100644 --- a/.github/workflows/linux-cpu-x64-build.yml +++ b/.github/workflows/linux-cpu-x64-build.yml @@ -4,9 +4,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: - ort_dir: "onnxruntime-linux-x64-1.17.1" - ort_zip: "onnxruntime-linux-x64-1.17.1.tgz" - ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-linux-x64-1.17.1.tgz" + ort_dir: "onnxruntime-linux-x64-1.17.3" + ort_zip: "onnxruntime-linux-x64-1.17.3.tgz" + ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-linux-x64-1.17.3.tgz" jobs: linux_cpu_x64: diff --git a/.github/workflows/linux-cpu-x64-nightly-build.yml b/.github/workflows/linux-cpu-x64-nightly-build.yml index c7a0234b1..fd4fb20fc 100644 --- a/.github/workflows/linux-cpu-x64-nightly-build.yml +++ b/.github/workflows/linux-cpu-x64-nightly-build.yml @@ -12,9 +12,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: - ort_dir: "onnxruntime-linux-x64-1.17.1" - ort_zip: "onnxruntime-linux-x64-1.17.1.tgz" - ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-linux-x64-1.17.1.tgz" + ort_dir: "onnxruntime-linux-x64-1.17.3" + ort_zip: "onnxruntime-linux-x64-1.17.3.tgz" + ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-linux-x64-1.17.3.tgz" jobs: job: runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-Ubuntu2204-AMD-CPU" ] diff --git a/.github/workflows/linux-gpu-x64-build.yml b/.github/workflows/linux-gpu-x64-build.yml index c4e4c372a..182ccb9f2 100644 --- a/.github/workflows/linux-gpu-x64-build.yml +++ b/.github/workflows/linux-gpu-x64-build.yml @@ -6,9 +6,9 @@ concurrency: cancel-in-progress: true env: - ort_dir: "onnxruntime-linux-x64-gpu-1.17.1" - ort_zip: "onnxruntime-linux-x64-gpu-1.17.1.tgz" - ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-linux-x64-gpu-1.17.1.tgz" + ort_dir: "onnxruntime-linux-x64-gpu-1.17.3" + ort_zip: "onnxruntime-linux-x64-gpu-1.17.3.tgz" + ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-linux-x64-gpu-1.17.3.tgz" jobs: linux-gpu-x64-build: diff --git a/.github/workflows/mac-cpu-arm64-build.yml b/.github/workflows/mac-cpu-arm64-build.yml index d757370f4..9cb9cdc46 100644 --- a/.github/workflows/mac-cpu-arm64-build.yml +++ b/.github/workflows/mac-cpu-arm64-build.yml @@ -4,9 +4,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: - ort_dir: "onnxruntime-osx-arm64-1.17.1" - ort_zip: "onnxruntime-osx-arm64-1.17.1.tgz" - ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-osx-arm64-1.17.1.tgz" + ort_dir: "onnxruntime-osx-arm64-1.17.3" + ort_zip: "onnxruntime-osx-arm64-1.17.3.tgz" + ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-osx-arm64-1.17.3.tgz" jobs: mac-cpu-arm64-build: runs-on: macos-latest diff --git a/.github/workflows/win-cpu-arm64-build.yml b/.github/workflows/win-cpu-arm64-build.yml index 7c64ba8ff..916af3009 100644 --- a/.github/workflows/win-cpu-arm64-build.yml +++ b/.github/workflows/win-cpu-arm64-build.yml @@ -11,9 +11,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: - ort_dir: "onnxruntime-win-arm64-1.17.1" + ort_dir: "onnxruntime-win-arm64-1.17.3" ort_zip: "$(ort_dir).zip" - ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/$(ort_zip)" + ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/$(ort_zip)" binaryDir: 'build/cpu' jobs: @@ -33,7 +33,7 @@ jobs: - name: Download OnnxRuntime run: | - $env:ort_url = "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-arm64-1.17.1.zip" + $env:ort_url = "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-win-arm64-1.17.3.zip" Invoke-WebRequest -Uri $env:ort_url -OutFile $env:ort_zip - name: Unzip OnnxRuntime diff --git a/.github/workflows/win-cpu-x64-build.yml b/.github/workflows/win-cpu-x64-build.yml index f13f3c2c8..ca0bb6b5b 100644 --- a/.github/workflows/win-cpu-x64-build.yml +++ b/.github/workflows/win-cpu-x64-build.yml @@ -11,9 +11,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: - ort_dir: "onnxruntime-win-x64-1.17.1" + ort_dir: "onnxruntime-win-x64-1.17.3" ort_zip: "$(ort_dir).zip" - ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/$(ort_zip)" + ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/$(ort_zip)" binaryDir: 'build/cpu' jobs: @@ -35,7 +35,7 @@ jobs: - name: Download OnnxRuntime run: | - $env:ort_url = "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-x64-1.17.1.zip" + $env:ort_url = "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-win-x64-1.17.3.zip" Invoke-WebRequest -Uri $env:ort_url -OutFile $env:ort_zip - name: Unzip OnnxRuntime diff --git a/.github/workflows/win-gpu-x64-build.yml b/.github/workflows/win-gpu-x64-build.yml index a3f1d338b..430001e62 100644 --- a/.github/workflows/win-gpu-x64-build.yml +++ b/.github/workflows/win-gpu-x64-build.yml @@ -8,9 +8,9 @@ concurrency: env: AZCOPY_AUTO_LOGIN_TYPE: MSI AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4 - ort_dir: "onnxruntime-win-x64-gpu-1.17.1" - ort_zip: "onnxruntime-win-x64-gpu-1.17.1.zip" - ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-x64-gpu-1.17.1.zip" + ort_dir: "onnxruntime-win-x64-gpu-1.17.3" + ort_zip: "onnxruntime-win-x64-gpu-1.17.3.zip" + ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-win-x64-gpu-1.17.3.zip" cuda_dir: "${{ github.workspace }}\\cuda_sdk" cuda_version: "11.8" CUDA_PATH: ${{ github.workspace }}\\cuda_sdk\\v11.8 diff --git a/.gitignore b/.gitignore index 60b60827f..1ff9a0f9c 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,7 @@ /test/test_models/* /cache_models /onnxruntime-linux-x64-* -/*.csv +*.csv .idea cache_dir example-models @@ -22,4 +22,6 @@ examples/python/genai_models examples/python/hf_cache !test/test_models/hf-internal-testing/ -!test/test_models/hf-internal-testing/tiny-random-gpt2*/*.onnx \ No newline at end of file +!test/test_models/hf-internal-testing/tiny-random-gpt2*/*.onnx + +.ipynb_checkpoints/ \ No newline at end of file diff --git a/.pipelines/nuget-publishing.yml b/.pipelines/nuget-publishing.yml index 451411fd2..b6fff7111 100644 --- a/.pipelines/nuget-publishing.yml +++ b/.pipelines/nuget-publishing.yml @@ -23,7 +23,7 @@ parameters: - name: ort_version displayName: 'OnnxRuntime version' type: string - default: '1.17.1' + default: '1.17.3' - name: cuda_version displayName: 'CUDA version' diff --git a/.pipelines/pypl-publishing.yml b/.pipelines/pypl-publishing.yml index edce0b37d..1069ede44 100644 --- a/.pipelines/pypl-publishing.yml +++ b/.pipelines/pypl-publishing.yml @@ -22,7 +22,7 @@ parameters: - name: ort_version displayName: 'OnnxRuntime version' type: string - default: '1.17.1' + default: '1.17.3' - name: cuda_version displayName: 'CUDA version' diff --git a/CMakeLists.txt b/CMakeLists.txt index de12d6482..b325c2763 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,11 @@ if(USE_CUDA AND CMAKE_CUDA_COMPILER AND CMAKE_BUILD_TYPE STREQUAL "Debug") add_compile_definitions(_DEBUG=1) endif() +if(MSVC) + # set updated value for __cplusplus macro instead of 199711L + add_compile_options($<$:/Zc:__cplusplus>) +endif() + message(STATUS "Adding source files") file(GLOB generator_srcs CONFIGURE_DEPENDS @@ -127,6 +132,11 @@ else() set(ONNXRUNTIME_EXTENSIONS_LIB "tfmtok_c.so") endif() +file(GLOB onnxruntime_libs "${ORT_LIB_DIR}/${ONNXRUNTIME_FILES}") +if(USE_DML) + list(APPEND onnxruntime_libs "${ORT_LIB_DIR}/DirectML.dll") +endif() + if(NO_TOKENIZEROOT) add_compile_definitions(NO_TOKENIZER=1) message("----------------Tokenizer Disabled------------------") @@ -148,6 +158,11 @@ if(ENABLE_PYTHON) message("------------------Enabling Python Wheel------------------") endif() +if(ENABLE_MODEL_BENCHMARK) + add_subdirectory("${CMAKE_SOURCE_DIR}/benchmark/c") + message("------------------Enabling model benchmark------------------") +endif() + if(NOT EXISTS "${ORT_LIB_DIR}/${ONNXRUNTIME_LIB}") message(FATAL_ERROR "Expected the ONNX Runtime library to be found at ${ORT_LIB_DIR}/${ONNXRUNTIME_LIB}. Actual: Not found.") endif() @@ -158,7 +173,6 @@ if(USE_CUDA AND NOT EXISTS "${ORT_LIB_DIR}/${ONNXRUNTIME_PROVIDERS_CUDA_LIB}") message(FATAL_ERROR "Expected the ONNX Runtime providers cuda library to be found at ${ORT_LIB_DIR}/${ONNXRUNTIME_PROVIDERS_CUDA_LIB}. Actual: Not found.") endif() -file(GLOB onnxruntime_libs "${ORT_LIB_DIR}/${ONNXRUNTIME_FILES}") target_link_directories(onnxruntime-genai PRIVATE ${ORT_LIB_DIR}) target_link_libraries(onnxruntime-genai PRIVATE ${ONNXRUNTIME_LIB}) @@ -182,7 +196,6 @@ if(MSVC) endif() # Copy the onnxruntime binaries into the build folder so it's found on launch -file(GLOB onnxruntime_libs "${ORT_LIB_DIR}/${ONNXRUNTIME_FILES}") foreach(DLL_FILE ${onnxruntime_libs}) add_custom_command( TARGET onnxruntime-genai POST_BUILD diff --git a/README.md b/README.md index b572a94fd..0e8b42566 100644 --- a/README.md +++ b/README.md @@ -50,10 +50,19 @@ See full documentation at [https://onnxruntime.ai/docs/genai]. [Install](https://onnxruntime.ai/docs/genai/howto/install) the onnxruntime-genai Python package. +1. Build the model +```shell +python -m onnxruntime_genai.models.builder -m microsoft/phi-2 -e cpu -p int4 -o ./models/phi2 +``` + +2. Run inference ```python +import os import onnxruntime_genai as og -model = og.Model(f'models/microsoft/phi-2') +model_path = os.path.abspath("./models/phi2") + +model = og.Model(model_path) tokenizer = og.Tokenizer(model) @@ -64,7 +73,7 @@ prompt = '''def print_prime(n): tokens = tokenizer.encode(prompt) -params = og.SearchParams(model) +params = og.GeneratorParams(model) params.set_search_options({"max_length":200}) params.input_ids = tokens diff --git a/VERSION_INFO b/VERSION_INFO index 3e2177af6..b4f09dd42 100644 --- a/VERSION_INFO +++ b/VERSION_INFO @@ -1 +1 @@ -0.1.0rc4 \ No newline at end of file +0.2.0-dev \ No newline at end of file diff --git a/benchmark/c/CMakeLists.txt b/benchmark/c/CMakeLists.txt new file mode 100644 index 000000000..0035f3e5e --- /dev/null +++ b/benchmark/c/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +set(model_benchmark_srcs + ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/options.h + ${CMAKE_CURRENT_SOURCE_DIR}/options.cpp +) + +add_executable(model_benchmark ${model_benchmark_srcs}) + +target_include_directories(model_benchmark PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/src # directory containing the ort_genai headers +) + +target_link_libraries(model_benchmark PRIVATE onnxruntime-genai-static ${ONNXRUNTIME_LIB}) + +target_link_directories(model_benchmark PRIVATE ${ORT_LIB_DIR}) + +add_custom_command(TARGET model_benchmark POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${onnxruntime_libs} $ +) + +source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${model_benchmark_srcs}) diff --git a/benchmark/c/main.cpp b/benchmark/c/main.cpp new file mode 100644 index 000000000..0a6840c42 --- /dev/null +++ b/benchmark/c/main.cpp @@ -0,0 +1,242 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ort_genai.h" + +#include "options.h" + +namespace { + +using Clock = std::chrono::steady_clock; + +using Duration = Clock::duration; +using DurationFp = std::chrono::duration; + +class Timing { + public: + Timing(const Timing&) = delete; + Timing& operator=(const Timing&) = delete; + + Timing(std::vector& measurements) + : measurements_{measurements}, start_{Clock::now()} { + } + + ~Timing() { + const auto measurement = Clock::now() - start_; + measurements_.push_back(measurement); + } + + private: + std::vector& measurements_; + const Clock::time_point start_; +}; + +struct Statistics { + DurationFp average{}; + DurationFp stddev{}; + DurationFp p50{}; + DurationFp p90{}; + DurationFp p99{}; + size_t n{}; +}; + +Statistics ComputeStats(const std::vector& measurements) { + Statistics stats{}; + if (measurements.empty()) { + return stats; + } + + stats.n = measurements.size(); + + const auto sum = std::accumulate(measurements.begin(), measurements.end(), Duration{0}); + stats.average = DurationFp{sum} / stats.n; + + std::vector sorted = measurements; + std::sort(sorted.begin(), sorted.end()); + + stats.p50 = sorted[stats.n * 0.5]; + stats.p90 = sorted[stats.n * 0.9]; + stats.p99 = sorted[stats.n * 0.99]; + + if (stats.n > 1) { + const float variance = + std::accumulate( + measurements.begin(), measurements.end(), + 0.0f, + [mean = stats.average.count()](float accumulator, const Duration& m) -> float { + const float distance_from_mean = m.count() - mean; + return accumulator + distance_from_mean * distance_from_mean; + }) / + (stats.n - 1); + + const float stddev = std::sqrt(variance); + stats.stddev = DurationFp{stddev}; + } + + return stats; +} + +void WritePerTokenStats(std::string_view label, + const Statistics& stats, + const size_t tokens_per_measurement) { + using MicrosecondsFp = std::chrono::duration; + const auto avg_us = MicrosecondsFp{stats.average}; + std::cout << label << ":" + << "\n\tavg (us): " << avg_us.count() + << "\n\tavg (tokens/s): " << 1.0e6f / avg_us.count() * tokens_per_measurement + << "\n\tp50 (us): " << MicrosecondsFp{stats.p50}.count() + << "\n\tstddev (us): " << MicrosecondsFp{stats.stddev}.count() + << "\n\tn: " << stats.n << " * " << tokens_per_measurement << " token(s)" + << "\n"; +} + +void WriteE2EStats(std::string_view label, + const Statistics& stats) { + using MillisecondsFp = std::chrono::duration; + std::cout << label << ":" + << "\n\tavg (ms): " << MillisecondsFp{stats.average}.count() + << "\n\tp50 (ms): " << MillisecondsFp{stats.p50}.count() + << "\n\tstddev (ms): " << MillisecondsFp{stats.stddev}.count() + << "\n\tn: " << stats.n + << "\n"; +} + +std::string GeneratePrompt(size_t num_prompt_tokens, const OgaModel& model, const OgaTokenizer& tokenizer) { + const char* const base_prompt = "A"; + auto base_prompt_sequences = OgaSequences::Create(); + + tokenizer.Encode(base_prompt, *base_prompt_sequences); + + auto params = OgaGeneratorParams::Create(model); + params->SetSearchOption("max_length", num_prompt_tokens); + params->SetSearchOption("min_length", num_prompt_tokens); + params->SetInputSequences(*base_prompt_sequences); + + auto output_sequences = model.Generate(*params); + const auto output_sequence_length = output_sequences->SequenceCount(0); + const auto* output_sequence_data = output_sequences->SequenceData(0); + return std::string{tokenizer.Decode(output_sequence_data, output_sequence_length)}; +} + +void RunBenchmark(const benchmark::Options& opts) { + auto model = OgaModel::Create(opts.model_path.c_str()); + auto tokenizer = OgaTokenizer::Create(*model); + + const std::string prompt = GeneratePrompt(opts.num_prompt_tokens, *model, *tokenizer); + auto prompt_sequences = OgaSequences::Create(); + + if (opts.batch_size < 1) { + throw std::runtime_error("Batch size must be at least 1."); + } + + for (size_t i = 0; i < opts.batch_size; ++i) { + tokenizer->Encode(prompt.c_str(), *prompt_sequences); + } + + const size_t num_prompt_tokens = prompt_sequences->SequenceCount(0); + const size_t num_tokens = num_prompt_tokens + opts.num_tokens_to_generate; + + auto make_generator_params = [&] { + auto params = OgaGeneratorParams::Create(*model); + params->SetSearchOption("max_length", num_tokens); + params->SetSearchOption("min_length", num_tokens); + params->SetInputSequences(*prompt_sequences); + return params; + }; + + const auto generator_params = make_generator_params(); + + // warmup + if (opts.verbose) std::cout << "Running warmup iterations (" << opts.num_warmup_iterations << ")...\n"; + for (size_t i = 0; i < opts.num_warmup_iterations; ++i) { + auto output_sequences = model->Generate(*generator_params); + + if (opts.verbose && i == 0) { + // show prompt and output on first iteration + std::cout << "Prompt:\n\t" << prompt << "\n"; + const auto output_sequence_length = output_sequences->SequenceCount(0); + const auto* output_sequence_data = output_sequences->SequenceData(0); + const auto output = tokenizer->Decode(output_sequence_data, output_sequence_length); + std::cout << "Output:\n\t" << output << "\n"; + } + } + + std::vector e2e_gen_times, prompt_processing_times, token_gen_times, sampling_times; + // note: be sure to reserve enough to avoid vector reallocations in the measured code + e2e_gen_times.reserve(opts.num_iterations); + prompt_processing_times.reserve(opts.num_iterations); + token_gen_times.reserve(opts.num_iterations * (opts.num_tokens_to_generate - 1)); + sampling_times.reserve(opts.num_iterations * opts.num_tokens_to_generate); + + if (opts.verbose) std::cout << "Running iterations (" << opts.num_iterations << ")...\n"; + for (size_t i = 0; i < opts.num_iterations; ++i) { + auto generator = OgaGenerator::Create(*model, *generator_params); + + { + Timing e2e_gen_timing{e2e_gen_times}; + + { + Timing prompt_processing_timing{prompt_processing_times}; + generator->ComputeLogits(); + } + + { + Timing sampling_timing{sampling_times}; + generator->GenerateNextToken(); + } + + while (!generator->IsDone()) { + { + Timing token_gen_timing{token_gen_times}; + generator->ComputeLogits(); + } + + { + Timing sampling_timing{sampling_times}; + generator->GenerateNextToken(); + } + } + } + } + + { + std::cout << "Batch size: " << opts.batch_size + << ", prompt tokens: " << num_prompt_tokens + << ", tokens to generate: " << opts.num_tokens_to_generate + << "\n"; + + const auto e2e_gen_stats = ComputeStats(e2e_gen_times); + const auto prompt_processing_stats = ComputeStats(prompt_processing_times); + const auto token_gen_stats = ComputeStats(token_gen_times); + const auto sampling_stats = ComputeStats(sampling_times); + + WritePerTokenStats("Prompt processing (time to first token)", + prompt_processing_stats, opts.batch_size * num_prompt_tokens); + WritePerTokenStats("Token generation", token_gen_stats, opts.batch_size); + WritePerTokenStats("Token sampling", sampling_stats, opts.batch_size); + WriteE2EStats("E2E generation (entire generation loop)", e2e_gen_stats); + } +} + +} // namespace + +int main(int argc, char** argv) { + try { + const auto opts = benchmark::ParseOptionsFromCommandLine(argc, argv); + RunBenchmark(opts); + return 0; + } catch (const std::exception& e) { + std::cerr << "Exception: " << e.what() << "\n"; + return 1; + } +} diff --git a/benchmark/c/options.cpp b/benchmark/c/options.cpp new file mode 100644 index 000000000..7047a4466 --- /dev/null +++ b/benchmark/c/options.cpp @@ -0,0 +1,110 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "options.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace benchmark { + +namespace { + +[[noreturn]] void PrintHelpAndExit(const char* program_name, int exit_code) { + Options defaults{}; + std::ostringstream s; + + s << "Usage: " << program_name << " -i \n" + << " Options:\n" + << " -i,--input_folder \n" + << " Path to the ONNX model directory to benchmark, compatible with onnxruntime-genai.\n " + << " -b,--batch_size \n" + << " Number of sequences to generate in parallel. Default: " << defaults.batch_size << "\n" + << " -l,--prompt_length \n" + << " Number of tokens in the prompt. Default: " << defaults.num_prompt_tokens << "\n" + << " -g,--generation_length \n" + << " Number of tokens to generate. Default: " << defaults.num_tokens_to_generate << "\n" + << " -r,--repetitions \n" + << " Number of times to repeat the benchmark. Default: " << defaults.num_iterations << "\n" + << " -w,--warmup \n" + << " Number of warmup runs before benchmarking. Default: " << defaults.num_warmup_iterations << "\n" + << " -v,--verbose\n" + << " Show more informational output.\n" + << " -h,--help\n" + << " Show this help message and exit.\n"; + + std::cerr << s.str(); + std::exit(exit_code); +} + +template +T ParseNumber(std::string_view s) { + T n; + const auto *s_begin = s.data(), *s_end = s.data() + s.size(); + const auto [ptr, ec] = std::from_chars(s_begin, s_end, n); + if (ec != std::errc{} || ptr != s_end) { + throw std::runtime_error(std::string{"Failed to parse option value as number: "}.append(s)); + } + return n; +} + +void VerifyOptions(const Options& opts) { + if (opts.model_path.empty()) { + throw std::runtime_error("ONNX model directory path must be provided."); + } +} + +} // namespace + +Options ParseOptionsFromCommandLine(int argc, const char* const* argv) { + const char* const program_name = argc > 0 ? argv[0] : "model_benchmark"; + try { + Options opts{}; + + auto next_arg = [argc, argv](int& idx) { + if (idx + 1 >= argc) { + throw std::runtime_error("Option value not provided."); + } + return std::string_view{argv[++idx]}; + }; + + for (int i = 1; i < argc; ++i) { + std::string_view arg{argv[i]}; + + if (arg == "-i" || arg == "--input_folder") { + opts.model_path = next_arg(i); + } else if (arg == "-b" || arg == "--batch_size") { + opts.batch_size = ParseNumber(next_arg(i)); + } else if (arg == "-l" || arg == "--prompt_length") { + opts.num_prompt_tokens = ParseNumber(next_arg(i)); + } else if (arg == "-g" || arg == "--generation_length") { + opts.num_tokens_to_generate = ParseNumber(next_arg(i)); + } else if (arg == "-r" || arg == "--repetitions") { + opts.num_iterations = ParseNumber(next_arg(i)); + } else if (arg == "-w" || arg == "--warmup") { + opts.num_warmup_iterations = ParseNumber(next_arg(i)); + } else if (arg == "-v" || arg == "--verbose") { + opts.verbose = true; + } else if (arg == "-h" || arg == "--help") { + PrintHelpAndExit(program_name, 0); + } else { + throw std::runtime_error(std::string{"Unknown option: "}.append(arg)); + } + } + + VerifyOptions(opts); + + return opts; + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << "\n"; + PrintHelpAndExit(program_name, 1); + } +} + +} // namespace benchmark diff --git a/benchmark/c/options.h b/benchmark/c/options.h new file mode 100644 index 000000000..a00d19191 --- /dev/null +++ b/benchmark/c/options.h @@ -0,0 +1,22 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include + +namespace benchmark { + +struct Options { + std::string model_path{}; + size_t num_prompt_tokens{16}; + size_t num_tokens_to_generate{128}; + size_t batch_size{1}; + size_t num_iterations{5}; + size_t num_warmup_iterations{1}; + bool verbose{false}; +}; + +Options ParseOptionsFromCommandLine(int argc, const char* const* argv); + +} // namespace benchmark diff --git a/benchmark/python/README b/benchmark/python/README index da1174309..67cac3ccb 100644 --- a/benchmark/python/README +++ b/benchmark/python/README @@ -2,7 +2,7 @@ This is an end-to-end benchmarking script for any GenAI-supported ONNX model. Prerequisites: -0) Install onnxruntime-genai and onnxruntime +0) Install pandas, onnxruntime-genai and onnxruntime 1) Use builder.py to build the desired ONNX model @@ -10,4 +10,4 @@ Prerequisites: Example call to benchmarking script -python benchmark_e2e.py -i {model folder} -b 1 -l 128 -g 256 -r 100 -w 10 -k 5 -o {output csv file name} \ No newline at end of file +python benchmark_e2e.py -i {model folder} -b 1 -l 128 -g 256 -r 100 -w 10 -k 5 -o {output csv file name} diff --git a/build.py b/build.py index 150ba7a54..5c5b8febb 100644 --- a/build.py +++ b/build.py @@ -20,6 +20,10 @@ def is_linux(): """Check if the current platform is Linux.""" return sys.platform.startswith("linux") +def is_mac(): + """Check if the current platform is MacOS""" + return sys.platform.startswith("darwin") + def platform(): """Get the current platform.""" @@ -110,7 +114,7 @@ def build( Args: skip_wheel: Whether to skip building the Python wheel. Defaults to False. """ - if not is_windows() and not is_linux(): + if not is_windows() and not is_linux() and not is_mac(): raise OSError(f"Unsupported platform {platform()}.") if cuda_home and not use_cuda: diff --git a/cmake/options.cmake b/cmake/options.cmake index 80f004215..ac40a6d1d 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -6,5 +6,6 @@ option(NO_TOKENIZER "Don't include the Tokenizer" OFF) option(ENABLE_PYTHON "Build the Python API." ON) option(ENABLE_TESTS "Enable tests" ON) option(TEST_PHI2 "Enable tests for Phi2" OFF) +option(ENABLE_MODEL_BENCHMARK "Build model benchmark program" ON) cmake_dependent_option(BUILD_WHEEL "Build the python wheel" ON "ENABLE_PYTHON" OFF) \ No newline at end of file diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt index d44909286..9b33a3ed3 100644 --- a/examples/c/CMakeLists.txt +++ b/examples/c/CMakeLists.txt @@ -4,13 +4,24 @@ project(phi2) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++2a") +set(ORT_GENAI_LIB_DIR ${CMAKE_SOURCE_DIR}/lib) + +if(WIN32) + set(ONNXRUNTIME_GENAI_LIB "onnxruntime-genai.dll") + set(ONNXRUNTIME_GENAI_DEPENDENCY "*.dll") +elseif(APPLE) + set(ONNXRUNTIME_GENAI_LIB "libonnxruntime-genai.dylib") + set(ONNXRUNTIME_GENAI_DEPENDENCY "*.dylib") +else() + set(ONNXRUNTIME_GENAI_LIB "libonnxruntime-genai.so") + set(ONNXRUNTIME_GENAI_DEPENDENCY "*.so") +endif() + add_executable(phi2 ${CMAKE_SOURCE_DIR}/src/main.cpp) -add_library(onnxruntime-genai SHARED IMPORTED) -set_target_properties(onnxruntime-genai PROPERTIES - IMPORTED_LOCATION_RELEASE ${CMAKE_SOURCE_DIR}/lib/onnxruntime-genai.dll - IMPORTED_IMPLIB_RELEASE ${CMAKE_SOURCE_DIR}/lib/onnxruntime-genai.lib -) + +target_link_directories(phi2 PRIVATE ${ORT_GENAI_LIB_DIR}) +target_link_libraries(phi2 PRIVATE ${ONNXRUNTIME_GENAI_LIB}) target_include_directories(phi2 PRIVATE ${CMAKE_SOURCE_DIR}/include) target_link_libraries( @@ -18,5 +29,11 @@ target_link_libraries( PUBLIC onnxruntime-genai) -file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/phi-2" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/Release") -file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/lib/" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/Release") +file(GLOB ort_genai_libs "${CMAKE_SOURCE_DIR}/lib/${ONNXRUNTIME_GENAI_DEPENDENCY}") + +foreach(DLL_FILE ${ort_genai_libs}) + add_custom_command( + TARGET phi2 POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_FILE} $ + ) +endforeach() \ No newline at end of file diff --git a/examples/c/README.md b/examples/c/README.md index 8cd2168fd..0a45578cd 100644 --- a/examples/c/README.md +++ b/examples/c/README.md @@ -48,5 +48,5 @@ cmake --build . --config Release ```bash cd build\\Release -.\phi2.exe +.\phi2.exe path_to_model ``` diff --git a/examples/c/src/main.cpp b/examples/c/src/main.cpp index d9aeb68a8..e4be639f2 100644 --- a/examples/c/src/main.cpp +++ b/examples/c/src/main.cpp @@ -4,8 +4,8 @@ // C++ API Example -void CXX_API() { - auto model = OgaModel::Create("phi-2"); +void CXX_API(const char* model_path) { + auto model = OgaModel::Create(model_path); auto tokenizer = OgaTokenizer::Create(*model); const char* prompt = "def is_prime(num):"; @@ -34,9 +34,9 @@ void CheckResult(OgaResult* result) { } } -void C_API() { +void C_API(const char* model_path) { OgaModel* model; - OgaCreateModel("phi-2", &model); + OgaCreateModel(model_path, &model); OgaTokenizer* tokenizer; CheckResult(OgaCreateTokenizer(model, &tokenizer)); @@ -74,16 +74,26 @@ void C_API() { OgaDestroyModel(model); } -int main() { +static void print_usage(int /*argc*/, char** argv) { + std::cerr << "usage: " << argv[0] << " model_path" << std::endl; +} + +int main(int argc, char** argv) { + if (argc != 2) { + print_usage(argc, argv); + return -1; + } + + std::cout << "-------------" << std::endl; std::cout << "Hello, Phi-2!" << std::endl; std::cout << "-------------" << std::endl; std::cout << "C++ API" << std::endl; - CXX_API(); + CXX_API(argv[1]); std::cout << "C API" << std::endl; - C_API(); + C_API(argv[1]); return 0; } \ No newline at end of file diff --git a/examples/csharp/Genny/.gitignore b/examples/csharp/Genny/.gitignore new file mode 100644 index 000000000..496192431 --- /dev/null +++ b/examples/csharp/Genny/.gitignore @@ -0,0 +1,346 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# User-specific files +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ +**/Properties/launchSettings.json + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_i.h +*.ilk +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding add-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# JetBrains Rider +.idea/ +*.sln.iml + +# CodeRush +.cr/ + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ +/docs/build +src/TensorFlowNET.Native/bazel-* +src/TensorFlowNET.Native/c_api.h +/.vscode +test/TensorFlowNET.Examples/mnist + + +# training model resources +.resources +/redist +*.xml +*.xsd + +# docs +site/ + +docker-test-output/* diff --git a/examples/csharp/Genny/Assets/Screenshot1.PNG b/examples/csharp/Genny/Assets/Screenshot1.PNG new file mode 100644 index 000000000..59ef9f19a Binary files /dev/null and b/examples/csharp/Genny/Assets/Screenshot1.PNG differ diff --git a/examples/csharp/Genny/Assets/Screenshot2.PNG b/examples/csharp/Genny/Assets/Screenshot2.PNG new file mode 100644 index 000000000..d1c635481 Binary files /dev/null and b/examples/csharp/Genny/Assets/Screenshot2.PNG differ diff --git a/examples/csharp/Genny/Genny.sln b/examples/csharp/Genny/Genny.sln new file mode 100644 index 000000000..3a30e258e --- /dev/null +++ b/examples/csharp/Genny/Genny.sln @@ -0,0 +1,31 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.9.34622.214 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Genny", "Genny\Genny.csproj", "{831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug_Cuda|x64 = Debug_Cuda|x64 + Debug|x64 = Debug|x64 + Release_Cuda|x64 = Release_Cuda|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Debug_Cuda|x64.ActiveCfg = Debug_Cuda|x64 + {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Debug_Cuda|x64.Build.0 = Debug_Cuda|x64 + {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Debug|x64.ActiveCfg = Debug|x64 + {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Debug|x64.Build.0 = Debug|x64 + {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Release_Cuda|x64.ActiveCfg = Release_Cuda|x64 + {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Release_Cuda|x64.Build.0 = Release_Cuda|x64 + {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Release|x64.ActiveCfg = Release|x64 + {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {A7159277-CA72-45A9-8327-E3BF29214643} + EndGlobalSection +EndGlobal diff --git a/examples/csharp/Genny/Genny/App.xaml b/examples/csharp/Genny/Genny/App.xaml new file mode 100644 index 000000000..ec5ea8fd1 --- /dev/null +++ b/examples/csharp/Genny/Genny/App.xaml @@ -0,0 +1,10 @@ + + + + + + diff --git a/examples/csharp/Genny/Genny/App.xaml.cs b/examples/csharp/Genny/Genny/App.xaml.cs new file mode 100644 index 000000000..b6e61e540 --- /dev/null +++ b/examples/csharp/Genny/Genny/App.xaml.cs @@ -0,0 +1,11 @@ +using System.Windows; + +namespace Genny +{ + /// + /// Interaction logic for App.xaml + /// + public partial class App : Application + { + } +} diff --git a/examples/csharp/Genny/Genny/AssemblyInfo.cs b/examples/csharp/Genny/Genny/AssemblyInfo.cs new file mode 100644 index 000000000..b0ec82757 --- /dev/null +++ b/examples/csharp/Genny/Genny/AssemblyInfo.cs @@ -0,0 +1,10 @@ +using System.Windows; + +[assembly: ThemeInfo( + ResourceDictionaryLocation.None, //where theme specific resource dictionaries are located + //(used if a resource is not found in the page, + // or application resource dictionaries) + ResourceDictionaryLocation.SourceAssembly //where the generic resource dictionary is located + //(used if a resource is not found in the page, + // app, or any theme specific resource dictionaries) +)] diff --git a/examples/csharp/Genny/Genny/Controls/SearchOptionsControl.xaml b/examples/csharp/Genny/Genny/Controls/SearchOptionsControl.xaml new file mode 100644 index 000000000..2983243b5 --- /dev/null +++ b/examples/csharp/Genny/Genny/Controls/SearchOptionsControl.xaml @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/csharp/Genny/Genny/Controls/SearchOptionsControl.xaml.cs b/examples/csharp/Genny/Genny/Controls/SearchOptionsControl.xaml.cs new file mode 100644 index 000000000..6386a43de --- /dev/null +++ b/examples/csharp/Genny/Genny/Controls/SearchOptionsControl.xaml.cs @@ -0,0 +1,30 @@ +using Genny.ViewModel; +using System.Windows; +using System.Windows.Controls; + +namespace Genny.Controls +{ + /// + /// Interaction logic for SearchOptionsControl.xaml + /// + public partial class SearchOptionsControl : UserControl + { + public SearchOptionsControl() + { + InitializeComponent(); + } + + public static readonly DependencyProperty SearchOptionsProperty = + DependencyProperty.Register(nameof(SearchOptions), typeof(SearchOptionsModel), typeof(SearchOptionsControl), new PropertyMetadata(new SearchOptionsModel())); + + + /// + /// Gets or sets the search options. + /// + public SearchOptionsModel SearchOptions + { + get { return (SearchOptionsModel)GetValue(SearchOptionsProperty); } + set { SetValue(SearchOptionsProperty, value); } + } + } +} diff --git a/examples/csharp/Genny/Genny/Extensions.cs b/examples/csharp/Genny/Genny/Extensions.cs new file mode 100644 index 000000000..5074df1e2 --- /dev/null +++ b/examples/csharp/Genny/Genny/Extensions.cs @@ -0,0 +1,50 @@ +using Genny.ViewModel; +using Microsoft.ML.OnnxRuntimeGenAI; +using System.Threading; +using System.Threading.Tasks; +using System.Windows; + +namespace Genny +{ + internal static class Extensions + { + + /// + /// Applies the search options to the generator parameters. + /// + /// The generator parameters. + /// The search options. + internal static void ApplySearchOptions(this GeneratorParams generatorParams, SearchOptionsModel searchOptions) + { + generatorParams.SetSearchOption("top_p", searchOptions.TopP); + generatorParams.SetSearchOption("top_k", searchOptions.TopK); + generatorParams.SetSearchOption("temperature", searchOptions.Temperature); + generatorParams.SetSearchOption("repetition_penalty", searchOptions.RepetitionPenalty); + generatorParams.SetSearchOption("past_present_share_buffer", searchOptions.PastPresentShareBuffer); + generatorParams.SetSearchOption("num_return_sequences", searchOptions.NumReturnSequences); + generatorParams.SetSearchOption("no_repeat_ngram_size", searchOptions.NoRepeatNgramSize); + generatorParams.SetSearchOption("min_length", searchOptions.MinLength); + generatorParams.SetSearchOption("max_length", searchOptions.MaxLength); + generatorParams.SetSearchOption("length_penalty", searchOptions.LengthPenalty); + generatorParams.SetSearchOption("early_stopping", searchOptions.EarlyStopping); + generatorParams.SetSearchOption("do_sample", searchOptions.DoSample); + generatorParams.SetSearchOption("diversity_penalty", searchOptions.DiversityPenalty); + } + + internal static Task EncodeAsync(this Tokenizer tokenizer, string input, CancellationToken cancellationToken = default) + { + return Application.Current.Dispatcher.Invoke(() => + { + return Task.Run(() => tokenizer.Encode(input), cancellationToken); + }); + } + + internal static Task DecodeAsync(this Tokenizer tokenizer, int[] input, CancellationToken cancellationToken = default) + { + return Application.Current.Dispatcher.Invoke(() => + { + return Task.Run(() => tokenizer.Decode(input), cancellationToken); + }); + } + } +} diff --git a/examples/csharp/Genny/Genny/Genny.csproj b/examples/csharp/Genny/Genny/Genny.csproj new file mode 100644 index 000000000..d4928ad9f --- /dev/null +++ b/examples/csharp/Genny/Genny/Genny.csproj @@ -0,0 +1,25 @@ + + + + WinExe + net6.0-windows + disable + disable + true + true + x64 + x64 + Debug;Release;Debug_Cuda;Release_Cuda; + + + + + + + + + + + + + diff --git a/examples/csharp/Genny/Genny/Images/robot.png b/examples/csharp/Genny/Genny/Images/robot.png new file mode 100644 index 000000000..96edd0fb1 Binary files /dev/null and b/examples/csharp/Genny/Genny/Images/robot.png differ diff --git a/examples/csharp/Genny/Genny/Images/user.png b/examples/csharp/Genny/Genny/Images/user.png new file mode 100644 index 000000000..dcaf32f59 Binary files /dev/null and b/examples/csharp/Genny/Genny/Images/user.png differ diff --git a/examples/csharp/Genny/Genny/MainWindow.xaml b/examples/csharp/Genny/Genny/MainWindow.xaml new file mode 100644 index 000000000..3d721f96b --- /dev/null +++ b/examples/csharp/Genny/Genny/MainWindow.xaml @@ -0,0 +1,72 @@ + + + + + + + + + + +