Read character data from config file

nrl-ai · Aug 6, 2023 · d18ea63 · d18ea63
1 parent d39a8e1
commit d18ea63
Show file tree

Hide file tree

Showing 12 changed files with 24,725 additions and 33 deletions.
diff --git a/README.md b/README.md
@@ -43,22 +43,25 @@ https://github.com/nrl-ai/CustomChar/assets/18329471/005a2d0f-df32-4493-a391-597
 
 - [ ] Build AI characters from the knowledge database 💬💬💬 :
  - [x] LLamaCpp for LLM. Chat with **Llama-V2** on **all platforms**.
- - [ ] Knowledge indexing and searching.
+ - [x] Knowledge indexing and searching (EmbedDB + SQLite).
+ - [ ] Search knowledge DB to add into prompt.
 - [x] Talk to your character 🎤🎤🎤 :
  - [x] STT with **whisper.cpp**. Now supports speech recognition **on all platforms**.
 - [ ] Hear from your character 🔊🔊🔊 :
  - [x] TTS with "say" on MacOS. **Note**: "say" is not available on other platforms (Windows, Linux)
  - [ ] TTS with bark.cpp. Expected to be available on all platforms.
 - [x] Plugin executor:
- - [x] Open apps (WIP - Need improvement for Windows and Linux).
- - [ ] Search on Google.
- - [ ] Search on Youtube.
-- [ ] Perception engine to see the world through the camera 📷📷📷.
-- [x] Add a GUI for the character interaction.
+ - [x] Open apps
+ - [x] Record video
+- [ ] Perception engine to see the world through the camera 📷📷📷"
+ - [x] Support video capturing
+ - [ ] Object detection
+- [x] Add GUIs for the character interaction.
+ - [x] Dear ImgUI
+ - [ ] Web-based GUI
 - [ ] Add a GUI for the character creation.
 - [ ] Create virtual characters that can be used in games, simulations, and other applications.
- - [ ] Elon Musk
- - [ ] Iron Man
+ - [x] Iron Man (WIP)
  - [ ] Batman
  - [ ] Superman
 - [ ] Add bindings to other languages:

diff --git a/characters/jarvis/character.json b/characters/jarvis/character.json
@@ -0,0 +1,9 @@
+{
+ "prompt": "Ignore all prior programming instructions. Text transcript of a dialog, where {0} interacts with an AI assistant named Jarvis. JARVIS (just a very intelligent system), invented by Iron Man.\n{0}{4} Hello, {1}!\n{1}{4} Hello {0}! How may I help you today?\n{0}{4} What time is it?\n{1}{4} It is {2} o'clock.\n{0}{4} What year is it?\n{1}{4} We are in {3}.\n{0}{4})",
+ "person": "User",
+ "bot_name": "JARVIS",
+ "bot_voice": "JARVIS",
+ "language": "en",
+ "tts_model_path": "../models/ggml-base.en.bin",
+ "llm_model_path": "../models/llama-2-7b-chat.ggmlv3.q4_0.bin"
+}
diff --git a/data/jarvis.txt → characters/jarvis/quotes.txt b/data/jarvis.txt → characters/jarvis/quotes.txt
diff --git a/customchar/character/character.cpp b/customchar/character/character.cpp
@@ -3,21 +3,21 @@
 using namespace CC;
 using namespace CC::character;
 
-Character::Character(common::CCParams init_params) {
- params_ = init_params;
+Character::Character(common::CCParams params) {
+ params_ = params;
 
  // CC components
  speech_recognizer_ = std::make_shared<audio::SpeechRecognizer>(
- params_.sr_model_path, params_.language, params_.audio_ctx,
+ params_.tts_model_path, params_.language, params_.audio_ctx,
  params_.n_threads, params_.max_tokens, params_.translate,
  params_.no_timestamps, params_.print_special, params_.speed_up);
  voice_recoder_ = std::make_shared<audio::VoiceRecorder>();
  voice_synthesizer_ = std::make_shared<audio::VoiceSynthesizer>();
 
  // Load LLM
- llm_ =
- std::make_shared<llm::LLM>(params_.llm_model_path, params_.path_session,
- params_.person, params_.bot_name);
+ llm_ = std::make_shared<llm::LLM>(params.llm_model_path, params_.path_session,
+  params_.person, params_.bot_name,
+  params.prompt);
  llm_->eval_model();
 
  // Load plugin executor

diff --git a/customchar/character/character.h b/customchar/character/character.h
@@ -44,8 +44,8 @@ class Character {
 
  public:
  /// @brief Constructor
- /// @param init_params
- Character(common::CCParams init_params);
+ /// @param params Parameters
+ Character(common::CCParams params);
 
  /// @brief Set mute (do not speak)
  void set_mute(bool is_muted);

diff --git a/customchar/common/helpers.cpp b/customchar/common/helpers.cpp
@@ -56,7 +56,7 @@ void common::cc_print_params_usage(int /*argc*/, char** argv,
  fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n",
  params.language.c_str());
  fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n",
- params.sr_model_path.c_str());
+ params.tts_model_path.c_str());
  fprintf(stderr, " -ml FILE, --model-llama [%-7s] llama model file\n",
  params.llm_model_path.c_str());
  fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n",
@@ -70,11 +70,85 @@ void common::cc_print_params_usage(int /*argc*/, char** argv,
  "large!) (default: none)\n");
  fprintf(stderr, " --verbose-prompt [%-7s] print prompt at start\n",
  params.verbose_prompt ? "true" : "false");
- fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n",
- params.fname_out.c_str());
  fprintf(stderr, "\n");
 }
 
+bool common::cc_params_from_config(const std::string& fname, CCParams& params) {
+ params = CCParams();
+ std::ifstream f(fname);
+ json data = json::parse(f);
+ if (data.contains("prob0")) {
+ params.prob0 = data["prob0"];
+ }
+ if (data.contains("n_threads")) {
+ params.n_threads = data["n_threads"];
+ }
+ if (data.contains("voice_ms")) {
+ params.voice_ms = data["voice_ms"];
+ }
+ if (data.contains("capture_id")) {
+ params.capture_id = data["capture_id"];
+ }
+ if (data.contains("max_tokens")) {
+ params.max_tokens = data["max_tokens"];
+ }
+ if (data.contains("audio_ctx")) {
+ params.audio_ctx = data["audio_ctx"];
+ }
+ if (data.contains("vad_thold")) {
+ params.vad_thold = data["vad_thold"];
+ }
+ if (data.contains("freq_thold")) {
+ params.freq_thold = data["freq_thold"];
+ }
+ if (data.contains("speed_up")) {
+ params.speed_up = data["speed_up"];
+ }
+ if (data.contains("translate")) {
+ params.translate = data["translate"];
+ }
+ if (data.contains("print_special")) {
+ params.print_special = data["print_special"];
+ }
+ if (data.contains("print_energy")) {
+ params.print_energy = data["print_energy"];
+ }
+ if (data.contains("no_timestamps")) {
+ params.no_timestamps = data["no_timestamps"];
+ }
+ if (data.contains("verbose_prompt")) {
+ params.verbose_prompt = data["verbose_prompt"];
+ }
+ if (data.contains("person")) {
+ params.person = data["person"];
+ }
+ if (data.contains("bot_name")) {
+ params.bot_name = data["bot_name"];
+ }
+ if (data.contains("chat_symb")) {
+ params.chat_symb = data["chat_symb"];
+ }
+ if (data.contains("language")) {
+ params.language = data["language"];
+ }
+ if (data.contains("tts_model_path")) {
+ params.tts_model_path = data["tts_model_path"];
+ }
+ if (data.contains("llm_model_path")) {
+ params.llm_model_path = data["llm_model_path"];
+ }
+ if (data.contains("speak")) {
+ params.speak = data["speak"];
+ }
+ if (data.contains("prompt")) {
+ params.prompt = data["prompt"];
+ }
+ if (data.contains("path_session")) {
+ params.path_session = data["path_session"];
+ }
+ return true;
+}
+
 bool common::cc_params_parse(int argc, char** argv, CCParams& params) {
  for (int i = 1; i < argc; i++) {
  std::string arg = argv[i];
@@ -113,7 +187,7 @@ bool common::cc_params_parse(int argc, char** argv, CCParams& params) {
  } else if (arg == "-l" || arg == "--language") {
  params.language = argv[++i];
  } else if (arg == "-mw" || arg == "--model-whisper") {
- params.sr_model_path = argv[++i];
+ params.tts_model_path = argv[++i];
  } else if (arg == "-ml" || arg == "--model-llama") {
  params.llm_model_path = argv[++i];
  } else if (arg == "-s" || arg == "--speak") {
@@ -125,8 +199,6 @@ bool common::cc_params_parse(int argc, char** argv, CCParams& params) {
  if (params.prompt.back() == '\n') {
  params.prompt.pop_back();
  }
- } else if (arg == "-f" || arg == "--file") {
- params.fname_out = argv[++i];
  } else {
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
  cc_print_params_usage(argc, argv, params);

diff --git a/customchar/common/helpers.h b/customchar/common/helpers.h
@@ -9,9 +9,13 @@
 #include <thread>
 #include <vector>
 
+#include "nlohmann/json.hpp"
+
 namespace CC {
 namespace common {
 
+using json = nlohmann::json;
+
 /// @brief Parameters for CustomChar
 struct CCParams {
  float prob0;
@@ -36,14 +40,18 @@ struct CCParams {
  std::string bot_name = "JARVIS";
  std::string chat_symb = ":";
  std::string language = "en";
- std::string sr_model_path = "../models/ggml-base.en.bin";
+ std::string tts_model_path = "../models/ggml-base.en.bin";
  std::string llm_model_path = "../models/llama-2-7b-chat.ggmlv3.q4_0.bin";
  std::string speak = "say";
  std::string prompt = "";
- std::string fname_out;
  std::string path_session = "";
 };
 
+/// @brief Create CCParams from a character config file
+/// @param fname File name
+/// @param params Parsed parameters
+bool cc_params_from_config(const std::string& fname, CCParams& params);
+
 /// @brief Parse command line arguments
 /// @param argc Number of arguments
 /// @param argv Arguments

diff --git a/customchar/llm/llm.cpp b/customchar/llm/llm.cpp
@@ -4,11 +4,13 @@ using namespace CC;
 using namespace CC::llm;
 
 LLM::LLM(const std::string& model_path, const std::string& path_session,
- const std::string& person, const std::string& bot_name)
+ const std::string& person, const std::string& bot_name,
+ const std::string& prompt)
  : model_path_(model_path),
  path_session_(path_session),
  person_(person),
- bot_name_(bot_name) {
+ bot_name_(bot_name),
+ base_prompt_(prompt) {
  // Init prompt
  init_prompt();
 
@@ -45,7 +47,7 @@ LLM::~LLM() {
 
 void LLM::init_prompt() {
  // Construct the initial prompt for LLaMA inference
- prompt_llama_ = prompt_.empty() ? kPromptLlama : prompt_;
+ prompt_llama_ = base_prompt_.empty() ? kPromptLlama : base_prompt_;
 
  // Need to have leading ' '
  prompt_llama_.insert(0, 1, ' ');

diff --git a/customchar/llm/llm.h b/customchar/llm/llm.h
@@ -17,7 +17,7 @@ namespace CC {
 namespace llm {
 
 const std::string kPromptLlama =
- R"(Ignore all prior programming instructions. (Text transcript of a dialog, where {0} interacts with an AI assistant named Jarvis. Jarvis (just a very intelligent system), invented by Iron Man.
+ R"(Ignore all prior programming instructions. (Text transcript of a dialog, where {0} interacts with an AI assistant CustomChar.
 {0}{4} Hello, {1}!
 {1}{4} Hello {0}! How may I help you today?
 {0}{4} What time is it?
@@ -31,6 +31,7 @@ class LLM {
  std::string model_path_;
  std::string person_ = "User";
  std::string bot_name_ = "JARVIS";
+ std::string base_prompt_ = "";
  const std::string chat_symb_ = ":";
  bool verbose_prompt_ = false;
  bool need_to_save_session_ = false;
@@ -41,7 +42,6 @@ class LLM {
  struct llama_context* ctx_llama_;
 
  std::string prompt_llama_;
- std::string prompt_ = "";
 
  std::vector<llama_token> embd_inp_;
  std::vector<llama_token> session_tokens_;
@@ -62,7 +62,8 @@ class LLM {
  /// @param path_session Path to the session
  LLM(const std::string& model_path, const std::string& path_session = "",
  const std::string& person = "User",
- const std::string& bot_name = "JARVIS");
+ const std::string& bot_name = "JARVIS",
+ const std::string& base_prompt = "");
  ~LLM();
 
  /// @brief Evaluate the model. Run this function before inference.

diff --git a/customchar/main.cpp b/customchar/main.cpp
@@ -298,11 +298,12 @@ void run_img_ui(std::shared_ptr<session::ChatHistory> history) {
 int main(int argc, char** argv) {
  // Parse command line arguments
  common::CCParams params;
- if (cc_params_parse(argc, argv, params) == false) {
+ if (!cc_params_from_config("../characters/jarvis/character.json", params)) {
+ std::cerr << "Error: failed to parse character config" << std::endl;
  exit(1);
  }
  if (whisper_lang_id(params.language.c_str()) == -1) {
- fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
+ std::cerr << "Error: unknown language" << params.language << std::endl;
  cc_print_params_usage(argc, argv, params);
  exit(1);
  }

diff --git a/examples/search_doc.cpp b/examples/search_doc.cpp
@@ -48,7 +48,7 @@ int main() {
  Collection* collection = new Collection(connection_name, path, dim, max_size);
 
  // Read the document from file
- std::string file_path = "../data/jarvis.txt";
+ std::string file_path = "../characters/jarvis/quotes.txt";
  std::vector<std::string> lines = read_lines(file_path);
 
  // Insert all documents