From 2f619f904ca446427f914bef4540545645f66ed5 Mon Sep 17 00:00:00 2001 From: Ryan Hill <38674843+RyanUnderhill@users.noreply.github.com> Date: Mon, 29 Apr 2024 15:19:40 -0700 Subject: [PATCH] Remove NO_TOKENIZER build option (#353) We always build with tokenizer now, and the option was broken anyways as it wasn't being updated. --- CMakeLists.txt | 15 +++++---------- cmake/options.cmake | 1 - src/models/model.cpp | 22 ---------------------- src/models/model.h | 17 ----------------- test/CMakeLists.txt | 9 +++------ test/model_tests.cpp | 8 -------- 6 files changed, 8 insertions(+), 64 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index acf8f22f6..7aa8021a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -179,16 +179,11 @@ if(USE_DML) add_dependencies(onnxruntime-genai-static RESTORE_PACKAGES) endif() -if(NO_TOKENIZEROOT) - add_compile_definitions(NO_TOKENIZER=1) - message("----------------Tokenizer Disabled------------------") -else() - add_subdirectory("${CMAKE_SOURCE_DIR}/src/tokenizer") - target_include_directories(onnxruntime-genai PRIVATE ${TOKENIZER_ROOT}) - target_include_directories(onnxruntime-genai-static PUBLIC ${TOKENIZER_ROOT}) - target_link_libraries(onnxruntime-genai PRIVATE tokenizer) - target_link_libraries(onnxruntime-genai-static PUBLIC tokenizer) -endif() +add_subdirectory("${CMAKE_SOURCE_DIR}/src/tokenizer") +target_include_directories(onnxruntime-genai PRIVATE ${TOKENIZER_ROOT}) +target_include_directories(onnxruntime-genai-static PUBLIC ${TOKENIZER_ROOT}) +target_link_libraries(onnxruntime-genai PRIVATE tokenizer) +target_link_libraries(onnxruntime-genai-static PUBLIC tokenizer) if(ENABLE_TESTS) add_subdirectory("${CMAKE_SOURCE_DIR}/test") diff --git a/cmake/options.cmake b/cmake/options.cmake index ac40a6d1d..688633fda 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -2,7 +2,6 @@ include(CMakeDependentOption) option(USE_CUDA "Build with CUDA support" ON) option(USE_DML "Build with DML support" OFF) -option(NO_TOKENIZER "Don't include the Tokenizer" OFF) option(ENABLE_PYTHON "Build the Python API." ON) option(ENABLE_TESTS "Enable tests" ON) option(TEST_PHI2 "Enable tests for Phi2" OFF) diff --git a/src/models/model.cpp b/src/models/model.cpp index 4e7aa7343..439ab5c6a 100644 --- a/src/models/model.cpp +++ b/src/models/model.cpp @@ -94,26 +94,6 @@ std::vector PadInputs(std::span> sequences, in return result; } -#ifdef NO_TOKENIZER -const std::string& TokenizerStream::Decode(int32_t token) { - throw std::runtime_error("Tokenizer not enabled"); -} - -std::unique_ptr Tokenizer::CreateStream() const { - return std::make_unique(); -} - -Tokenizer::Tokenizer(Config& config) { -} - -std::vector Tokenizer::Encode(const char* text) const { - throw std::runtime_error("Tokenizer not enabled"); -} - -std::string Tokenizer::Decode(std::span tokens) const { - throw std::runtime_error("Tokenizer not enabled"); -} -#else void CheckResult(tfmError_t error) { if (error != kTfmOK) throw std::runtime_error(TfmGetLastErrorMessage()); @@ -179,8 +159,6 @@ std::vector Tokenizer::DecodeBatch(std::span sequenc return strings; } -#endif - #if USE_CUDA // Since Python/Others can and will hold onto a generator object past the model object's lifetime we need to ensure // the allocator used is not destroyed until last. This keeps the allocator around until exit, after all other memory diff --git a/src/models/model.h b/src/models/model.h index b569373f8..fe3b9d832 100644 --- a/src/models/model.h +++ b/src/models/model.h @@ -1,8 +1,5 @@ #pragma once -#ifndef NO_TOKENIZER #include "tfmtok_c.h" -#endif - #include "captured_graph_pool.h" #if USE_DML @@ -36,19 +33,6 @@ struct State { void ClearIO(); // Clear all inputs/outputs }; -#ifdef NO_TOKENIZER -struct TokenizerStream { - const std::string& Decode(int32_t token); -}; - -struct Tokenizer { - Tokenizer(Config& config); - - std::vector Encode(const char* text) const; - std::string Decode(std::span tokens) const; -}; -#else - template struct TfmPtr { ~TfmPtr() { TfmDispose(&p_); } @@ -94,7 +78,6 @@ struct Tokenizer : std::enable_shared_from_this { private: int32_t pad_token_id_; }; -#endif struct SessionInfo { SessionInfo(OrtSession& session); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index daf8c40b3..80bb58fcd 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -43,12 +43,9 @@ if(USE_CUDA AND CMAKE_CUDA_COMPILER) target_sources(unit_tests PRIVATE ${cuda_test_srcs}) endif() -if(NO_TOKENIZER) - add_compile_definitions(NO_TOKENIZER=1) -else() - target_include_directories(unit_tests PRIVATE ${TOKENIZER_ROOT}) - target_link_libraries(unit_tests PRIVATE tokenizer) -endif() +target_include_directories(unit_tests PRIVATE ${TOKENIZER_ROOT}) +target_link_libraries(unit_tests PRIVATE tokenizer) + set(TEST_MODEL_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/test_models/") set(TEST_MODEL_DES_DIR "$/test_models/") add_custom_command(TARGET unit_tests POST_BUILD diff --git a/test/model_tests.cpp b/test/model_tests.cpp index 66ceaee83..73c6464e0 100644 --- a/test/model_tests.cpp +++ b/test/model_tests.cpp @@ -203,7 +203,6 @@ TEST(ModelTests, BeamSearchGptCuda) { TEST(ModelTests, TestApiCuda) { #if TEST_PHI2 -#ifndef NO_TOKENIZER auto prompt = R"( def print_prime(n): @@ -234,15 +233,11 @@ Print all primes between 1 and n auto result = generator->GetSequence(0); std::cout << tokenizer->Decode(result.GetCPU()) << "\r\n"; -#else - std::cout << "Test skipped - not built with onnxruntime extensions\r\n"; -#endif #endif } TEST(ModelTests, TestHighLevelApiCuda) { #if TEST_PHI2 -#ifndef NO_TOKENIZER auto prompt = R"( def print_prime(n): ''' @@ -266,9 +261,6 @@ Print all primes between 1 and n auto result = Generators::Generate(*model, *params); std::cout << tokenizer->Decode(result[0]) << "\r\n"; -#else - std::cout << "Test skipped - not built with onnxruntime extensions\r\n"; -#endif #endif }