From 98588f227457b6a1b8f40fdd376ecbde17fa0c3a Mon Sep 17 00:00:00 2001 From: zhihao Date: Tue, 15 Oct 2024 13:54:40 +0000 Subject: [PATCH] update --- include/flexflow/request_manager.h | 1 + src/runtime/request_manager.cc | 27 ++++++++++++++++++--------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/include/flexflow/request_manager.h b/include/flexflow/request_manager.h index 36a56012fc..ca83acb4f4 100644 --- a/include/flexflow/request_manager.h +++ b/include/flexflow/request_manager.h @@ -302,6 +302,7 @@ class RequestManager { ModelType model_type; int bos_token_id; int eos_token_id; + bool old_llama_tokenizer = false; std::string output_filepath; std::queue pending_infr_request_queue; std::queue pending_peft_request_queue; diff --git a/src/runtime/request_manager.cc b/src/runtime/request_manager.cc index fcc936daa7..151ccee19b 100644 --- a/src/runtime/request_manager.cc +++ b/src/runtime/request_manager.cc @@ -189,23 +189,32 @@ void RequestManager::register_tokenizer(ModelType type, // try with tokenizer.json first std::filesystem::path tokenizer_json_path; if (std::filesystem::is_directory(tokenizer_folder)) { - tokenizer_json_path = std::filesystem::path(tokenizer_folder) / "tokenizer.json"; + tokenizer_json_path = + std::filesystem::path(tokenizer_folder) / "tokenizer.json"; } else { tokenizer_json_path = tokenizer_folder; } if (std::filesystem::exists(tokenizer_json_path)) { + old_llama_tokenizer = true; // load from tokenizer.json - this->tokenizer_ = Tokenizer::FromBlobJSON(LoadBytesFromFile(tokenizer_json_path.string())); + this->tokenizer_ = Tokenizer::FromBlobJSON( + LoadBytesFromFile(tokenizer_json_path.string())); } else { // load from tokenizer.model - std::filesystem::path tokenizer_model_path = - tokenizer_folder / "tokenizer.model"; + std::filesystem::path tokenizer_model_path; + if (std::filesystem::is_directory(tokenizer_folder)) { + tokenizer_model_path = + std::filesystem::path(tokenizer_folder) / "tokenizer.model"; + } else { + tokenizer_model_path = tokenizer_folder; + } if (!std::filesystem::exists(tokenizer_model_path)) { std::cerr << "Failed to open file: " << tokenizer_model_path << std::endl; assert(false); } - this->tokenizer_ = Tokenizer::FromBlobSentencePiece(LoadBytesFromFile(tokenizer_model_path.string())); + this->tokenizer_ = Tokenizer::FromBlobSentencePiece( + LoadBytesFromFile(tokenizer_model_path.string())); } } else if (model_type == ModelType::OPT) { std::filesystem::path vocab_file = tokenizer_folder / "vocab.json"; @@ -658,7 +667,7 @@ BatchConfig RequestManager::prepare_next_batch(BatchConfig const &old_bc, std::string output = this->tokenizer_->Decode(request.tokens); // Unlike Huggingface, the sentencepiece C++ library automatically // removes the BOS token - if (model_type == ModelType::LLAMA && + if (model_type == ModelType::LLAMA && old_llama_tokenizer && request.tokens.at(0) == bos_token_id) { output = " " + output; } @@ -1119,7 +1128,7 @@ BeamSearchBatchConfig std::string output = this->tokenizer_->Decode(request.tokens); // Unlike Huggingface, the sentencepiece C++ library automatically // removes the BOS token - if (model_type == ModelType::LLAMA && + if (model_type == ModelType::LLAMA && old_llama_tokenizer && request.tokens.at(0) == bos_token_id) { output = " " + output; } @@ -1262,7 +1271,7 @@ BeamSearchBatchConfig std::string output = this->tokenizer_->Decode(request.tokens); // Unlike Huggingface, the sentencepiece C++ library automatically // removes the BOS token - if (model_type == ModelType::LLAMA && + if (model_type == ModelType::LLAMA && old_llama_tokenizer && request.tokens.at(0) == bos_token_id) { output = " " + output; } @@ -1310,7 +1319,7 @@ BeamSearchBatchConfig std::string output = this->tokenizer_->Decode(request.tokens); // Unlike Huggingface, the sentencepiece C++ library automatically removes // the BOS token - if (model_type == ModelType::LLAMA && + if (model_type == ModelType::LLAMA && old_llama_tokenizer && request.tokens.at(0) == bos_token_id) { output = " " + output; }