From 2f619f904ca446427f914bef4540545645f66ed5 Mon Sep 17 00:00:00 2001
From: Ryan Hill <38674843+RyanUnderhill@users.noreply.github.com>
Date: Mon, 29 Apr 2024 15:19:40 -0700
Subject: [PATCH] Remove NO_TOKENIZER build option (#353)

We always build with tokenizer now, and the option was broken anyways as
it wasn't being updated.
---
 CMakeLists.txt       | 15 +++++----------
 cmake/options.cmake  |  1 -
 src/models/model.cpp | 22 ----------------------
 src/models/model.h   | 17 -----------------
 test/CMakeLists.txt  |  9 +++------
 test/model_tests.cpp |  8 --------
 6 files changed, 8 insertions(+), 64 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index acf8f22f6..7aa8021a1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -179,16 +179,11 @@ if(USE_DML)
   add_dependencies(onnxruntime-genai-static RESTORE_PACKAGES)
 endif()
 
-if(NO_TOKENIZEROOT)
-  add_compile_definitions(NO_TOKENIZER=1)
-  message("----------------Tokenizer Disabled------------------")
-else()
-  add_subdirectory("${CMAKE_SOURCE_DIR}/src/tokenizer")
-  target_include_directories(onnxruntime-genai PRIVATE ${TOKENIZER_ROOT})
-  target_include_directories(onnxruntime-genai-static PUBLIC ${TOKENIZER_ROOT})
-  target_link_libraries(onnxruntime-genai PRIVATE tokenizer)
-  target_link_libraries(onnxruntime-genai-static PUBLIC tokenizer)
-endif()
+add_subdirectory("${CMAKE_SOURCE_DIR}/src/tokenizer")
+target_include_directories(onnxruntime-genai PRIVATE ${TOKENIZER_ROOT})
+target_include_directories(onnxruntime-genai-static PUBLIC ${TOKENIZER_ROOT})
+target_link_libraries(onnxruntime-genai PRIVATE tokenizer)
+target_link_libraries(onnxruntime-genai-static PUBLIC tokenizer)
 
 if(ENABLE_TESTS)
   add_subdirectory("${CMAKE_SOURCE_DIR}/test")
diff --git a/cmake/options.cmake b/cmake/options.cmake
index ac40a6d1d..688633fda 100644
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@@ -2,7 +2,6 @@ include(CMakeDependentOption)
 
 option(USE_CUDA "Build with CUDA support" ON)
 option(USE_DML "Build with DML support" OFF)
-option(NO_TOKENIZER "Don't include the Tokenizer" OFF)
 option(ENABLE_PYTHON "Build the Python API." ON)
 option(ENABLE_TESTS "Enable tests" ON)
 option(TEST_PHI2 "Enable tests for Phi2" OFF)
diff --git a/src/models/model.cpp b/src/models/model.cpp
index 4e7aa7343..439ab5c6a 100644
--- a/src/models/model.cpp
+++ b/src/models/model.cpp
@@ -94,26 +94,6 @@ std::vector<int32_t> PadInputs(std::span<std::span<const int32_t>> sequences, in
   return result;
 }
 
-#ifdef NO_TOKENIZER
-const std::string& TokenizerStream::Decode(int32_t token) {
-  throw std::runtime_error("Tokenizer not enabled");
-}
-
-std::unique_ptr<TokenizerStream> Tokenizer::CreateStream() const {
-  return std::make_unique<TokenizerStream>();
-}
-
-Tokenizer::Tokenizer(Config& config) {
-}
-
-std::vector<int32_t> Tokenizer::Encode(const char* text) const {
-  throw std::runtime_error("Tokenizer not enabled");
-}
-
-std::string Tokenizer::Decode(std::span<int32_t> tokens) const {
-  throw std::runtime_error("Tokenizer not enabled");
-}
-#else
 void CheckResult(tfmError_t error) {
   if (error != kTfmOK)
     throw std::runtime_error(TfmGetLastErrorMessage());
@@ -179,8 +159,6 @@ std::vector<std::string> Tokenizer::DecodeBatch(std::span<const int32_t> sequenc
   return strings;
 }
 
-#endif
-
 #if USE_CUDA
 // Since Python/Others can and will hold onto a generator object past the model object's lifetime we need to ensure
 // the allocator used is not destroyed until last. This keeps the allocator around until exit, after all other memory
diff --git a/src/models/model.h b/src/models/model.h
index b569373f8..fe3b9d832 100644
--- a/src/models/model.h
+++ b/src/models/model.h
@@ -1,8 +1,5 @@
 #pragma once
-#ifndef NO_TOKENIZER
 #include "tfmtok_c.h"
-#endif
-
 #include "captured_graph_pool.h"
 
 #if USE_DML
@@ -36,19 +33,6 @@ struct State {
   void ClearIO();                                             // Clear all inputs/outputs
 };
 
-#ifdef NO_TOKENIZER
-struct TokenizerStream {
-  const std::string& Decode(int32_t token);
-};
-
-struct Tokenizer {
-  Tokenizer(Config& config);
-
-  std::vector<int32_t> Encode(const char* text) const;
-  std::string Decode(std::span<int32_t> tokens) const;
-};
-#else
-
 template <typename T>
 struct TfmPtr {
   ~TfmPtr() { TfmDispose(&p_); }
@@ -94,7 +78,6 @@ struct Tokenizer : std::enable_shared_from_this<Tokenizer> {
  private:
   int32_t pad_token_id_;
 };
-#endif
 
 struct SessionInfo {
   SessionInfo(OrtSession& session);
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index daf8c40b3..80bb58fcd 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -43,12 +43,9 @@ if(USE_CUDA AND CMAKE_CUDA_COMPILER)
   target_sources(unit_tests PRIVATE ${cuda_test_srcs})
 endif()
 
-if(NO_TOKENIZER)
-  add_compile_definitions(NO_TOKENIZER=1)
-else()
-  target_include_directories(unit_tests PRIVATE ${TOKENIZER_ROOT})
-  target_link_libraries(unit_tests PRIVATE tokenizer)
-endif()
+target_include_directories(unit_tests PRIVATE ${TOKENIZER_ROOT})
+target_link_libraries(unit_tests PRIVATE tokenizer)
+
 set(TEST_MODEL_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/test_models/")
 set(TEST_MODEL_DES_DIR "$<TARGET_FILE_DIR:unit_tests>/test_models/")
 add_custom_command(TARGET unit_tests POST_BUILD
diff --git a/test/model_tests.cpp b/test/model_tests.cpp
index 66ceaee83..73c6464e0 100644
--- a/test/model_tests.cpp
+++ b/test/model_tests.cpp
@@ -203,7 +203,6 @@ TEST(ModelTests, BeamSearchGptCuda) {
 
 TEST(ModelTests, TestApiCuda) {
 #if TEST_PHI2
-#ifndef NO_TOKENIZER
 
   auto prompt = R"(
 def print_prime(n):
@@ -234,15 +233,11 @@ Print all primes between 1 and n
   auto result = generator->GetSequence(0);
 
   std::cout << tokenizer->Decode(result.GetCPU()) << "\r\n";
-#else
-  std::cout << "Test skipped - not built with onnxruntime extensions\r\n";
-#endif
 #endif
 }
 
 TEST(ModelTests, TestHighLevelApiCuda) {
 #if TEST_PHI2
-#ifndef NO_TOKENIZER
   auto prompt = R"(
 def print_prime(n):
 '''
@@ -266,9 +261,6 @@ Print all primes between 1 and n
   auto result = Generators::Generate(*model, *params);
 
   std::cout << tokenizer->Decode(result[0]) << "\r\n";
-#else
-  std::cout << "Test skipped - not built with onnxruntime extensions\r\n";
-#endif
 #endif
 }