Skip to content

Commit

Permalink
Make tokenizer be built in (#40)
Browse files Browse the repository at this point in the history
* Make tokenizer be built in
* Remove USE_ORT_EXT build option.
  • Loading branch information
RyanUnderhill authored Jan 29, 2024
1 parent eedfb32 commit 59127f9
Show file tree
Hide file tree
Showing 26 changed files with 3,146 additions and 30 deletions.
72 changes: 50 additions & 22 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ include(cmake/external/onnxruntime_external_deps.cmake)

project (Generators LANGUAGES C CXX)
set(USE_CUDA 1) # "Build with CUDA support"
set(USE_ORT_EXT 0) # "Build with Onnxruntime Extensions tokenizer support"
set(USE_TOKENIZER 1) # "Build with Tokenizer support"

# Checking if CUDA is supported
include(CheckLanguage)
Expand Down Expand Up @@ -35,6 +35,7 @@ set(GENERATORS_ROOT ${PROJECT_SOURCE_DIR}/src)
set(MODELS_ROOT ${PROJECT_SOURCE_DIR}/src/models)
set(TESTS_ROOT ${PROJECT_SOURCE_DIR}/src/tests)
set(PYTHON_ROOT ${PROJECT_SOURCE_DIR}/src/python)
set(TOKENIZER_ROOT ${PROJECT_SOURCE_DIR}/src/tokenizer)

# CUDA Being enabled will make it not a debug build without this option, so all of the C++ headers will complain
# about a mismatch with the actual debug headers and it'll fail to link. I don't know why this happens, or if this is the best fix.
Expand All @@ -61,6 +62,12 @@ file(GLOB python_srcs CMAKE_CONFIGURE_DEPENDS
"${PYTHON_ROOT}/*.cpp"
)

file(GLOB tokenizer_srcs CONFIGURE_DEPENDS
"${TOKENIZER_ROOT}/*.cc"
"${TOKENIZER_ROOT}/utils/*.cc"
"${TOKENIZER_ROOT}/c_api/*.cc"
)

if(USE_CUDA AND CMAKE_CUDA_COMPILER)
# Don't let cmake set a default value for CMAKE_CUDA_ARCHITECTURES
cmake_policy(SET CMP0104 OLD)
Expand Down Expand Up @@ -90,6 +97,40 @@ endif()
add_library (onnxruntime-genai ${generator_srcs})
target_include_directories(onnxruntime-genai PRIVATE ${CMAKE_SOURCE_DIR}/ort/include)

if(USE_TOKENIZER)
FetchContent_Declare(spm URL https://github.com/google/sentencepiece/archive/refs/tags/v0.1.99.zip)
set(SPM_USE_EXTERNAL_ABSL OFF CACHE BOOL "Use external absl")
set(SPM_USE_BUILTIN_PROTOBUF ON CACHE BOOL "Use built-in protobuf")
set(SPM_ENABLE_SHARED OFF)

FetchContent_GetProperties(spm)
if(NOT spm_POPULATED)
FetchContent_Populate(spm)
add_subdirectory(${spm_SOURCE_DIR} ${spm_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()

FetchContent_Declare(GSL URL https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip)
FetchContent_MakeAvailable(GSL)

FetchContent_Declare(simdjson URL https://github.com/simdjson/simdjson/archive/refs/tags/v3.6.3.zip URL_HASH SHA1=2b063a2e81f74a5d1cb937fadf3d2fca0f1edb09)
FetchContent_MakeAvailable (simdjson)

add_library (tokenizer ${tokenizer_srcs})
target_include_directories(tokenizer PUBLIC
${TOKENIZER_ROOT}
${GSL_SOURCE_DIR}/include
${simdjson_SOURCE_DIR}/singleheader/
${spm_SOURCE_DIR}/src
${spm_SOURCE_DIR}/src/builtin_pb
${spm_SOURCE_DIR}/third_party/absl
${spm_SOURCE_DIR}/third_party/protobuf-lite
)
target_compile_definitions(tokenizer PRIVATE _SILENCE_ALL_CXX20_DEPRECATION_WARNINGS)

add_dependencies(tokenizer sentencepiece-static)
target_link_libraries(tokenizer PUBLIC sentencepiece-static simdjson)
endif()

if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
set_target_properties(onnxruntime-genai PROPERTIES POSITION_INDEPENDENT_CODE ON)
endif()
Expand Down Expand Up @@ -148,27 +189,14 @@ foreach(DLL_FILE ${onnxruntime_libs})
)
endforeach()

if(USE_ORT_EXT)
add_compile_definitions(USE_ORT_EXT=1)
target_include_directories(onnxruntime-genai PRIVATE ${CMAKE_SOURCE_DIR}/ort-ext/include)
target_include_directories(Tests PRIVATE ${CMAKE_SOURCE_DIR}/ort-ext/include)
target_include_directories(python PRIVATE ${CMAKE_SOURCE_DIR}/ort-ext/include)
target_link_directories(Tests PRIVATE ${CMAKE_SOURCE_DIR}/ort-ext/lib)
target_link_directories(python PRIVATE ${CMAKE_SOURCE_DIR}/ort-ext/lib)
target_link_libraries(Tests PRIVATE ${ONNXRUNTIME_EXTENSIONS_LIB})
target_link_libraries(python PRIVATE ${ONNXRUNTIME_EXTENSIONS_LIB})

# Copy the onnxruntime-extensions binaries into the build folder so it's found on launch
file(GLOB onnxruntime_extensions_libs "${CMAKE_SOURCE_DIR}/ort-ext/lib/${ONNXRUNTIME_EXTENSIONS_FILES}")
foreach(DLL_FILE ${onnxruntime_extensions_libs})
add_custom_command(
TARGET onnxruntime-genai POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
${DLL_FILE}
${CMAKE_BINARY_DIR}/$<CONFIG>/${DLL_FILE_NAME}
)
endforeach()
endif()
if(USE_TOKENIZER)
add_compile_definitions(USE_TOKENIZER=1)
target_include_directories(onnxruntime-genai PRIVATE ${TOKENIZER_ROOT})
target_include_directories(Tests PRIVATE ${TOKENIZER_ROOT})
target_include_directories(python PRIVATE ${TOKENIZER_ROOT})
target_link_libraries(Tests PRIVATE tokenizer)
target_link_libraries(python PRIVATE tokenizer)
endif()

# Have visual studio put all files into one single folder vs the default split of header files into a separate folder
source_group(TREE ${GENERATORS_ROOT} FILES ${generator_srcs})
Expand Down
4 changes: 2 additions & 2 deletions src/models/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ void State::ClearIO() {
outputs_.clear();
}

#if USE_ORT_EXT
#if USE_TOKENIZER
void CheckResult(tfmError_t error) {
if (error != kTfmOK)
throw std::runtime_error(TfmGetLastErrorMessage());
Expand Down Expand Up @@ -115,7 +115,7 @@ std::vector<int32_t> Model::Generate(const SearchParams& params) {
return v;
}

#if USE_ORT_EXT
#if USE_TOKENIZER
std::unique_ptr<Tokenizer> Model::CreateTokenizer() {
return std::make_unique<Tokenizer>(*config_);
}
Expand Down
6 changes: 3 additions & 3 deletions src/models/model.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#pragma once
#if USE_ORT_EXT
#if USE_TOKENIZER
#include "tfmtok_c.h"
#endif

Expand Down Expand Up @@ -36,7 +36,7 @@ struct TfmPtr {
T* p_{};
};

#if USE_ORT_EXT
#if USE_TOKENIZER
struct Tokenizer {
Tokenizer(Config& config);

Expand All @@ -52,7 +52,7 @@ struct Model {
virtual ~Model();

std::vector<int32_t> Generate(const SearchParams& params);
#if USE_ORT_EXT
#if USE_TOKENIZER
std::unique_ptr<Tokenizer> CreateTokenizer();
#endif

Expand Down
4 changes: 2 additions & 2 deletions src/python/python.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ PYBIND11_MODULE(onnxruntime_genai, m) {
m.def("print", &TestFP32, "Test float32");
m.def("print", &TestFP16, "Test float16");

#if USE_ORT_EXT
#if USE_TOKENIZER
pybind11::class_<Tokenizer>(m, "Tokenizer")
.def("encode", &Tokenizer::Encode)
.def("decode", [](const Tokenizer& t, pybind11::array_t<int32_t> tokens) { return t.Decode(ToSpan(tokens)); });
Expand All @@ -304,7 +304,7 @@ PYBIND11_MODULE(onnxruntime_genai, m) {
}),
"str"_a, "device_type"_a = DeviceType::Auto)
.def("Generate", [](Model& model, PySearchParams& search_params) { search_params.Prepare(); return model.Generate(search_params); })
#if USE_ORT_EXT
#if USE_TOKENIZER
.def("CreateTokenizer", [](Model& model) { return model.CreateTokenizer(); })
#endif
.def("CreateState", [](Model& model, PyRoamingArray<int32_t>& sequence_lengths, const PySearchParams& search_params) { return new PyState(model, sequence_lengths, search_params); })
Expand Down
2 changes: 1 addition & 1 deletion src/tests/tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ void Test_BeamSearch_Gpt_Cuda() {
void Test_Phi2_Cuda() {
#if TEST_PHI2
std::cout << "Testing_Phi2\r\n";
#if USE_ORT_EXT
#if USE_TOKENIZER

auto prompt = R"(
def print_prime(n):
Expand Down
Loading

0 comments on commit 59127f9

Please sign in to comment.