From 6e56bf92e069fe6bf962376314523ec72708bfbb Mon Sep 17 00:00:00 2001
From: Viet Anh Nguyen <vietanh.dev@gmail.com>
Date: Sun, 30 Jul 2023 11:00:12 +0700
Subject: [PATCH 1/2] Clean up APIs

---
 customchar/audio/voice_recorder.cpp | 27 +++++++++++++++++++++++++++
 customchar/audio/voice_recorder.h   |  4 ++++
 customchar/character/character.cpp  | 27 ++++++---------------------
 customchar/llm/llm.cpp              | 26 ++++++++++++++++++--------
 customchar/llm/llm.h                |  2 +-
 5 files changed, 56 insertions(+), 30 deletions(-)
diff --git a/customchar/audio/voice_recorder.cpp b/customchar/audio/voice_recorder.cpp
index 8a62257..3484dc3 100644
--- a/customchar/audio/voice_recorder.cpp
+++ b/customchar/audio/voice_recorder.cpp
@@ -32,3 +32,30 @@ void VoiceRecorder::GetAudio(std::vector<float>& result) {
   audio_->Get(voice_ms, pcmf32_cur_);
   result = pcmf32_cur_;
 }
+
+std::vector<float> VoiceRecorder::RecordSpeech() {
+  bool is_running;
+  std::vector<float> audio_buff;
+  while (true) {
+    // Handle Ctrl + C
+    is_running = audio::SDLPollEvents();
+    if (!is_running) {
+      break;
+    }
+
+    // Delay
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+    // Sample audio
+    SampleAudio();
+    if (!FinishedTalking()) {
+      continue;
+    }
+
+    // Get recorded audio
+    GetAudio(audio_buff);
+    break;
+  }
+
+  return audio_buff;
+};
diff --git a/customchar/audio/voice_recorder.h b/customchar/audio/voice_recorder.h
index 9e4cb00..b2557b5 100644
--- a/customchar/audio/voice_recorder.h
+++ b/customchar/audio/voice_recorder.h
@@ -35,6 +35,10 @@ class VoiceRecorder {
 
   /// @brief Get final audio_
   void GetAudio(std::vector<float>& result);
+
+  /// @brief Record speech from user
+  /// @return Audio buffer from user
+  std::vector<float> RecordSpeech();
 };  // class VoiceRecorder
 
 }  // namespace audio
diff --git a/customchar/character/character.cpp b/customchar/character/character.cpp
index 26e98d6..f15c826 100644
--- a/customchar/character/character.cpp
+++ b/customchar/character/character.cpp
@@ -57,31 +57,20 @@ void Character::Run() {
       break;
     }
 
-    // Delay
-    std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    float prob = 0.0f;
-    int64_t t_ms = 0;
-
-    // Sample audio
-    voice_recoder_->SampleAudio();
-    if (!voice_recoder_->FinishedTalking()) {
-      continue;
-    }
-
-    // Get recorded audio
-    std::vector<float> audio_buff;
-    voice_recoder_->GetAudio(audio_buff);
+    // Record speech from user
+    std::vector<float> audio_buff = voice_recoder_->RecordSpeech();
 
     // Recognize speech
+    float prob;
+    int64_t t_ms;
     std::string text_heard =
         speech_recognizer_->Recognize(audio_buff, prob, t_ms);
 
     // Tokenize user input
     auto tokens = llm_->Tokenize(text_heard, false);
 
-    // Skip if nothing was heard
+    // Start over if nothing was heard
     if (text_heard.empty() || tokens.empty()) {
-      printf("Heard nothing, skipping ...\n");
       voice_recoder_->ClearAudioBuffer();
       continue;
     }
@@ -103,12 +92,8 @@ void Character::Run() {
     // Otherwise, LLM will handle
     std::string response;
     if (!plugin_executor_->ParseAndExecute(text_heard, response)) {
-      // Append the new input tokens to the session_tokens vector
-      llm_->AddTokensToCurrentSession(tokens);
-      // Get answer from LLM
-      embd = llm_->Tokenize(formated_text_heard, false);
       // Get answer from LLM
-      response = llm_->GetAnswer(embd);
+      response = llm_->GetAnswer(text_heard);
     } else {
       // TODO: Add plugin executor response to LLM session
     }
diff --git a/customchar/llm/llm.cpp b/customchar/llm/llm.cpp
index 4a11e3d..a0ca150 100644
--- a/customchar/llm/llm.cpp
+++ b/customchar/llm/llm.cpp
@@ -181,11 +181,21 @@ void LLM::EvalModel() {
       n_matching_session_tokens < (embd_inp_.size() * 3 / 4);
 }
 
-std::string LLM::GetAnswer(std::vector<llama_token>& embd) {
+std::string LLM::GetAnswer(const std::string& user_input) {
+  // Tokenize and put unformated tokens to the session store
+  AddTokensToCurrentSession(Tokenize(user_input, false));
+
+  // Format the input and tokenize
+  // TODO: Do it more efficient (using above output)
+  std::string formated_input = user_input;
+  formated_input.insert(0, 1, ' ');
+  formated_input += "\n" + bot_name_ + chat_symb_;
+  std::vector<llama_token> embd = Tokenize(formated_input, false);
+
   bool done = false;
   int last_length = 0;
   int loop_count = 0;
-  std::string text_to_speak;
+  std::string output_text;
   while (true) {
     if (embd.size() > 0) {
       if (n_past_ + (int)embd.size() > n_ctx_) {
@@ -297,7 +307,7 @@ std::string LLM::GetAnswer(std::vector<llama_token>& embd) {
       if (id != llama_token_eos()) {
         // add it to the context
         embd.push_back(id);
-        text_to_speak += llama_token_to_str(ctx_llama_, id);
+        output_text += llama_token_to_str(ctx_llama_, id);
         printf("%s", llama_token_to_str(ctx_llama_, id));
       }
     }
@@ -314,7 +324,7 @@ std::string LLM::GetAnswer(std::vector<llama_token>& embd) {
                              last_output.length() - antiprompt.length(),
                              antiprompt.length()) != std::string::npos) {
           done = true;
-          text_to_speak = common::Replace(text_to_speak, antiprompt, "");
+          output_text = common::Replace(output_text, antiprompt, "");
           fflush(stdout);
           need_to_save_session_ = true;
           break;
@@ -324,8 +334,8 @@ std::string LLM::GetAnswer(std::vector<llama_token>& embd) {
 
     // Break to avoid infinite loop
     // TODO: Fix this bug
-    if ((int)text_to_speak.length() == last_length + 1 &&
-        text_to_speak[text_to_speak.length() - 1] == '\n') {
+    if ((int)output_text.length() == last_length + 1 &&
+        output_text[output_text.length() - 1] == '\n') {
       ++loop_count;
     } else {
       loop_count = 0;
@@ -333,8 +343,8 @@ std::string LLM::GetAnswer(std::vector<llama_token>& embd) {
     if (loop_count > 5) {
       break;
     }
-    last_length = text_to_speak.length();
+    last_length = output_text.length();
   }
 
-  return text_to_speak;
+  return output_text;
 }
diff --git a/customchar/llm/llm.h b/customchar/llm/llm.h
index 3e95b15..415781d 100644
--- a/customchar/llm/llm.h
+++ b/customchar/llm/llm.h
@@ -87,7 +87,7 @@ class LLM {
   std::vector<llama_token> Tokenize(const std::string& text, bool add_bos);
 
   /// @brief Get answer from LLM
-  std::string GetAnswer(std::vector<llama_token>& embd);
+  std::string GetAnswer(const std::string& user_input);
 };
 
 }  // namespace llm

From 7077559bd89ed7b6055613aac2a848bd65b0fa1a Mon Sep 17 00:00:00 2001
From: Viet Anh Nguyen <vietanh.dev@gmail.com>
Date: Sun, 30 Jul 2023 11:06:57 +0700
Subject: [PATCH 2/2] Add instructions to install OpenCV

---
 README.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 695c274..6b0578b 100644
--- a/README.md
+++ b/README.md
@@ -67,22 +67,25 @@ Install dependencies:
 - [CMake](https://cmake.org/download/)
 - C++ 14 compiler
 
-On macOS:
+On macOS: Using [Homebrew](https://brew.sh/)
 
 ```bash
 brew install sdl2 glew glfw3
+brew install opencv
 ```
 
 On Ubuntu:
 
 ```bash
 sudo apt-get install libsdl2-dev libglew-dev libglfw3-dev
+sudo apt-get install libopencv-dev
 ```
 
-On Windows:
+On Windows: Using [vcpkg](https://github.com/microsoft/vcpkg) and [Git Bash](https://git-scm.com/downloads):
 
 ```bash
 vcpkg install sdl2:x64-windows glew:x64-windows glfw3:x64-windows
+vcpkg install opencv[contrib,nonfree,ffmpeg,ipp]:x64-windows --recurse
 ```
 
 Build the **CustomChar** executable: