Skip to content

Commit

Permalink
Merge pull request #4 from vietanhdev/refactor/clean_audio_llama_apis
Browse files Browse the repository at this point in the history
Clean up APIs
  • Loading branch information
vietanhdev authored Jul 30, 2023
2 parents 83b8aa3 + 7077559 commit 8c5d607
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 32 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,22 +67,25 @@ Install dependencies:
- [CMake](https://cmake.org/download/)
- C++ 14 compiler

On macOS:
On macOS: Using [Homebrew](https://brew.sh/)

```bash
brew install sdl2 glew glfw3
brew install opencv
```

On Ubuntu:

```bash
sudo apt-get install libsdl2-dev libglew-dev libglfw3-dev
sudo apt-get install libopencv-dev
```

On Windows:
On Windows: Using [vcpkg](https://github.com/microsoft/vcpkg) and [Git Bash](https://git-scm.com/downloads):

```bash
vcpkg install sdl2:x64-windows glew:x64-windows glfw3:x64-windows
vcpkg install opencv[contrib,nonfree,ffmpeg,ipp]:x64-windows --recurse
```

Build the **CustomChar** executable:
Expand Down
27 changes: 27 additions & 0 deletions customchar/audio/voice_recorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,30 @@ void VoiceRecorder::GetAudio(std::vector<float>& result) {
audio_->Get(voice_ms, pcmf32_cur_);
result = pcmf32_cur_;
}

std::vector<float> VoiceRecorder::RecordSpeech() {
bool is_running;
std::vector<float> audio_buff;
while (true) {
// Handle Ctrl + C
is_running = audio::SDLPollEvents();
if (!is_running) {
break;
}

// Delay
std::this_thread::sleep_for(std::chrono::milliseconds(100));

// Sample audio
SampleAudio();
if (!FinishedTalking()) {
continue;
}

// Get recorded audio
GetAudio(audio_buff);
break;
}

return audio_buff;
};
4 changes: 4 additions & 0 deletions customchar/audio/voice_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ class VoiceRecorder {

/// @brief Get final audio_
void GetAudio(std::vector<float>& result);

/// @brief Record speech from user
/// @return Audio buffer from user
std::vector<float> RecordSpeech();
}; // class VoiceRecorder

} // namespace audio
Expand Down
27 changes: 6 additions & 21 deletions customchar/character/character.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,31 +57,20 @@ void Character::Run() {
break;
}

// Delay
std::this_thread::sleep_for(std::chrono::milliseconds(100));
float prob = 0.0f;
int64_t t_ms = 0;

// Sample audio
voice_recoder_->SampleAudio();
if (!voice_recoder_->FinishedTalking()) {
continue;
}

// Get recorded audio
std::vector<float> audio_buff;
voice_recoder_->GetAudio(audio_buff);
// Record speech from user
std::vector<float> audio_buff = voice_recoder_->RecordSpeech();

// Recognize speech
float prob;
int64_t t_ms;
std::string text_heard =
speech_recognizer_->Recognize(audio_buff, prob, t_ms);

// Tokenize user input
auto tokens = llm_->Tokenize(text_heard, false);

// Skip if nothing was heard
// Start over if nothing was heard
if (text_heard.empty() || tokens.empty()) {
printf("Heard nothing, skipping ...\n");
voice_recoder_->ClearAudioBuffer();
continue;
}
Expand All @@ -103,12 +92,8 @@ void Character::Run() {
// Otherwise, LLM will handle
std::string response;
if (!plugin_executor_->ParseAndExecute(text_heard, response)) {
// Append the new input tokens to the session_tokens vector
llm_->AddTokensToCurrentSession(tokens);
// Get answer from LLM
embd = llm_->Tokenize(formated_text_heard, false);
// Get answer from LLM
response = llm_->GetAnswer(embd);
response = llm_->GetAnswer(text_heard);
} else {
// TODO: Add plugin executor response to LLM session
}
Expand Down
26 changes: 18 additions & 8 deletions customchar/llm/llm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,21 @@ void LLM::EvalModel() {
n_matching_session_tokens < (embd_inp_.size() * 3 / 4);
}

std::string LLM::GetAnswer(std::vector<llama_token>& embd) {
std::string LLM::GetAnswer(const std::string& user_input) {
// Tokenize and put unformated tokens to the session store
AddTokensToCurrentSession(Tokenize(user_input, false));

// Format the input and tokenize
// TODO: Do it more efficient (using above output)
std::string formated_input = user_input;
formated_input.insert(0, 1, ' ');
formated_input += "\n" + bot_name_ + chat_symb_;
std::vector<llama_token> embd = Tokenize(formated_input, false);

bool done = false;
int last_length = 0;
int loop_count = 0;
std::string text_to_speak;
std::string output_text;
while (true) {
if (embd.size() > 0) {
if (n_past_ + (int)embd.size() > n_ctx_) {
Expand Down Expand Up @@ -297,7 +307,7 @@ std::string LLM::GetAnswer(std::vector<llama_token>& embd) {
if (id != llama_token_eos()) {
// add it to the context
embd.push_back(id);
text_to_speak += llama_token_to_str(ctx_llama_, id);
output_text += llama_token_to_str(ctx_llama_, id);
printf("%s", llama_token_to_str(ctx_llama_, id));
}
}
Expand All @@ -314,7 +324,7 @@ std::string LLM::GetAnswer(std::vector<llama_token>& embd) {
last_output.length() - antiprompt.length(),
antiprompt.length()) != std::string::npos) {
done = true;
text_to_speak = common::Replace(text_to_speak, antiprompt, "");
output_text = common::Replace(output_text, antiprompt, "");
fflush(stdout);
need_to_save_session_ = true;
break;
Expand All @@ -324,17 +334,17 @@ std::string LLM::GetAnswer(std::vector<llama_token>& embd) {

// Break to avoid infinite loop
// TODO: Fix this bug
if ((int)text_to_speak.length() == last_length + 1 &&
text_to_speak[text_to_speak.length() - 1] == '\n') {
if ((int)output_text.length() == last_length + 1 &&
output_text[output_text.length() - 1] == '\n') {
++loop_count;
} else {
loop_count = 0;
}
if (loop_count > 5) {
break;
}
last_length = text_to_speak.length();
last_length = output_text.length();
}

return text_to_speak;
return output_text;
}
2 changes: 1 addition & 1 deletion customchar/llm/llm.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class LLM {
std::vector<llama_token> Tokenize(const std::string& text, bool add_bos);

/// @brief Get answer from LLM
std::string GetAnswer(std::vector<llama_token>& embd);
std::string GetAnswer(const std::string& user_input);
};

} // namespace llm
Expand Down

0 comments on commit 8c5d607

Please sign in to comment.