From a4d2b3a4a48284f12ec6e9ef91614a6b7b8686c7 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 6 Dec 2023 18:34:08 +0800 Subject: [PATCH] Convert input text into lowercases --- sherpa-onnx/csrc/offline-tts-character-frontend.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sherpa-onnx/csrc/offline-tts-character-frontend.cc b/sherpa-onnx/csrc/offline-tts-character-frontend.cc index befcb7bb8..cdbbe7a45 100644 --- a/sherpa-onnx/csrc/offline-tts-character-frontend.cc +++ b/sherpa-onnx/csrc/offline-tts-character-frontend.cc @@ -96,7 +96,7 @@ OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend( std::vector> OfflineTtsCharacterFrontend::ConvertTextToTokenIds( - const std::string &text, const std::string &voice /*= ""*/) const { + const std::string &_text, const std::string &voice /*= ""*/) const { // see // https://github.com/coqui-ai/TTS/blob/dev/TTS/tts/utils/text/tokenizer.py#L87 int32_t use_eos_bos = meta_data_.use_eos_bos; @@ -105,8 +105,10 @@ OfflineTtsCharacterFrontend::ConvertTextToTokenIds( int32_t blank_id = meta_data_.blank_id; int32_t add_blank = meta_data_.add_blank; - // Note: No need to convert text to lowercase since tokens.txt - // is assumed to contain both lowercase and uppercase tokens. + std::string text(_text.size(), 0); + std::transform(_text.begin(), _text.end(), text.begin(), + [](auto c) { return std::tolower(c); }); + std::wstring_convert, char32_t> conv; std::u32string s = conv.from_bytes(text);