From 6e56bf92e069fe6bf962376314523ec72708bfbb Mon Sep 17 00:00:00 2001 From: Viet Anh Nguyen Date: Sun, 30 Jul 2023 11:00:12 +0700 Subject: [PATCH 1/2] Clean up APIs --- customchar/audio/voice_recorder.cpp | 27 +++++++++++++++++++++++++++ customchar/audio/voice_recorder.h | 4 ++++ customchar/character/character.cpp | 27 ++++++--------------------- customchar/llm/llm.cpp | 26 ++++++++++++++++++-------- customchar/llm/llm.h | 2 +- 5 files changed, 56 insertions(+), 30 deletions(-) diff --git a/customchar/audio/voice_recorder.cpp b/customchar/audio/voice_recorder.cpp index 8a62257..3484dc3 100644 --- a/customchar/audio/voice_recorder.cpp +++ b/customchar/audio/voice_recorder.cpp @@ -32,3 +32,30 @@ void VoiceRecorder::GetAudio(std::vector& result) { audio_->Get(voice_ms, pcmf32_cur_); result = pcmf32_cur_; } + +std::vector VoiceRecorder::RecordSpeech() { + bool is_running; + std::vector audio_buff; + while (true) { + // Handle Ctrl + C + is_running = audio::SDLPollEvents(); + if (!is_running) { + break; + } + + // Delay + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + // Sample audio + SampleAudio(); + if (!FinishedTalking()) { + continue; + } + + // Get recorded audio + GetAudio(audio_buff); + break; + } + + return audio_buff; +}; diff --git a/customchar/audio/voice_recorder.h b/customchar/audio/voice_recorder.h index 9e4cb00..b2557b5 100644 --- a/customchar/audio/voice_recorder.h +++ b/customchar/audio/voice_recorder.h @@ -35,6 +35,10 @@ class VoiceRecorder { /// @brief Get final audio_ void GetAudio(std::vector& result); + + /// @brief Record speech from user + /// @return Audio buffer from user + std::vector RecordSpeech(); }; // class VoiceRecorder } // namespace audio diff --git a/customchar/character/character.cpp b/customchar/character/character.cpp index 26e98d6..f15c826 100644 --- a/customchar/character/character.cpp +++ b/customchar/character/character.cpp @@ -57,31 +57,20 @@ void Character::Run() { break; } - // Delay - std::this_thread::sleep_for(std::chrono::milliseconds(100)); - float prob = 0.0f; - int64_t t_ms = 0; - - // Sample audio - voice_recoder_->SampleAudio(); - if (!voice_recoder_->FinishedTalking()) { - continue; - } - - // Get recorded audio - std::vector audio_buff; - voice_recoder_->GetAudio(audio_buff); + // Record speech from user + std::vector audio_buff = voice_recoder_->RecordSpeech(); // Recognize speech + float prob; + int64_t t_ms; std::string text_heard = speech_recognizer_->Recognize(audio_buff, prob, t_ms); // Tokenize user input auto tokens = llm_->Tokenize(text_heard, false); - // Skip if nothing was heard + // Start over if nothing was heard if (text_heard.empty() || tokens.empty()) { - printf("Heard nothing, skipping ...\n"); voice_recoder_->ClearAudioBuffer(); continue; } @@ -103,12 +92,8 @@ void Character::Run() { // Otherwise, LLM will handle std::string response; if (!plugin_executor_->ParseAndExecute(text_heard, response)) { - // Append the new input tokens to the session_tokens vector - llm_->AddTokensToCurrentSession(tokens); - // Get answer from LLM - embd = llm_->Tokenize(formated_text_heard, false); // Get answer from LLM - response = llm_->GetAnswer(embd); + response = llm_->GetAnswer(text_heard); } else { // TODO: Add plugin executor response to LLM session } diff --git a/customchar/llm/llm.cpp b/customchar/llm/llm.cpp index 4a11e3d..a0ca150 100644 --- a/customchar/llm/llm.cpp +++ b/customchar/llm/llm.cpp @@ -181,11 +181,21 @@ void LLM::EvalModel() { n_matching_session_tokens < (embd_inp_.size() * 3 / 4); } -std::string LLM::GetAnswer(std::vector& embd) { +std::string LLM::GetAnswer(const std::string& user_input) { + // Tokenize and put unformated tokens to the session store + AddTokensToCurrentSession(Tokenize(user_input, false)); + + // Format the input and tokenize + // TODO: Do it more efficient (using above output) + std::string formated_input = user_input; + formated_input.insert(0, 1, ' '); + formated_input += "\n" + bot_name_ + chat_symb_; + std::vector embd = Tokenize(formated_input, false); + bool done = false; int last_length = 0; int loop_count = 0; - std::string text_to_speak; + std::string output_text; while (true) { if (embd.size() > 0) { if (n_past_ + (int)embd.size() > n_ctx_) { @@ -297,7 +307,7 @@ std::string LLM::GetAnswer(std::vector& embd) { if (id != llama_token_eos()) { // add it to the context embd.push_back(id); - text_to_speak += llama_token_to_str(ctx_llama_, id); + output_text += llama_token_to_str(ctx_llama_, id); printf("%s", llama_token_to_str(ctx_llama_, id)); } } @@ -314,7 +324,7 @@ std::string LLM::GetAnswer(std::vector& embd) { last_output.length() - antiprompt.length(), antiprompt.length()) != std::string::npos) { done = true; - text_to_speak = common::Replace(text_to_speak, antiprompt, ""); + output_text = common::Replace(output_text, antiprompt, ""); fflush(stdout); need_to_save_session_ = true; break; @@ -324,8 +334,8 @@ std::string LLM::GetAnswer(std::vector& embd) { // Break to avoid infinite loop // TODO: Fix this bug - if ((int)text_to_speak.length() == last_length + 1 && - text_to_speak[text_to_speak.length() - 1] == '\n') { + if ((int)output_text.length() == last_length + 1 && + output_text[output_text.length() - 1] == '\n') { ++loop_count; } else { loop_count = 0; @@ -333,8 +343,8 @@ std::string LLM::GetAnswer(std::vector& embd) { if (loop_count > 5) { break; } - last_length = text_to_speak.length(); + last_length = output_text.length(); } - return text_to_speak; + return output_text; } diff --git a/customchar/llm/llm.h b/customchar/llm/llm.h index 3e95b15..415781d 100644 --- a/customchar/llm/llm.h +++ b/customchar/llm/llm.h @@ -87,7 +87,7 @@ class LLM { std::vector Tokenize(const std::string& text, bool add_bos); /// @brief Get answer from LLM - std::string GetAnswer(std::vector& embd); + std::string GetAnswer(const std::string& user_input); }; } // namespace llm From 7077559bd89ed7b6055613aac2a848bd65b0fa1a Mon Sep 17 00:00:00 2001 From: Viet Anh Nguyen Date: Sun, 30 Jul 2023 11:06:57 +0700 Subject: [PATCH 2/2] Add instructions to install OpenCV --- README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 695c274..6b0578b 100644 --- a/README.md +++ b/README.md @@ -67,22 +67,25 @@ Install dependencies: - [CMake](https://cmake.org/download/) - C++ 14 compiler -On macOS: +On macOS: Using [Homebrew](https://brew.sh/) ```bash brew install sdl2 glew glfw3 +brew install opencv ``` On Ubuntu: ```bash sudo apt-get install libsdl2-dev libglew-dev libglfw3-dev +sudo apt-get install libopencv-dev ``` -On Windows: +On Windows: Using [vcpkg](https://github.com/microsoft/vcpkg) and [Git Bash](https://git-scm.com/downloads): ```bash vcpkg install sdl2:x64-windows glew:x64-windows glfw3:x64-windows +vcpkg install opencv[contrib,nonfree,ffmpeg,ipp]:x64-windows --recurse ``` Build the **CustomChar** executable: