Skip to content

Commit

Permalink
Merge pull request #2 from vietanhdev/feature/video_capture
Browse files Browse the repository at this point in the history
Feature: Video capture
  • Loading branch information
vietanhdev authored Jul 29, 2023
2 parents 36c0baa + 794cef1 commit c9bb98f
Show file tree
Hide file tree
Showing 6 changed files with 191 additions and 31 deletions.
14 changes: 8 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)
message(STATUS "SDL2_INCLUDE_DIRS = ${SDL2_INCLUDE_DIRS}")
message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")

# OpenCV for perception module
find_package(OpenCV REQUIRED)

# Add whisper-cpp
add_subdirectory(libs/whisper-cpp)

Expand All @@ -42,7 +45,7 @@ target_include_directories(
libs
.
)
target_link_libraries(${TARGET} PUBLIC ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} whisper)
target_link_libraries(${TARGET} PUBLIC ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} ${OpenCV_LIBS} whisper)

# CustomChar - cli
add_executable(
Expand All @@ -51,9 +54,9 @@ add_executable(
)
target_link_libraries(customchar-cli customchar-core)


option(BUILD_GUI "Build GUI" ON)
if (BUILD_GUI)

if(BUILD_GUI)
find_package(OpenGL REQUIRED)
find_package(GLEW REQUIRED)
find_package(glfw3 REQUIRED)
Expand All @@ -72,12 +75,12 @@ if (BUILD_GUI)
add_library(imgui STATIC ${IMGUI_SRCS})
target_include_directories(imgui PUBLIC ${IMGUI_DIR} ${IMGUI_DIR}/backends)

if (UNIX AND NOT APPLE)
if(UNIX AND NOT APPLE)
message(STATUS "Building for Linux")
set(LINUX_GL_LIBS GL GLEW)
target_link_libraries(${TARGET} PUBLIC ${LINUX_GL_LIBS} glfw)
target_compile_definitions(${TARGET} PUBLIC LINUX)
elseif (APPLE)
elseif(APPLE)
message(STATUS "Building for Mac OS X")
target_link_libraries(${TARGET} PUBLIC "-framework OpenGL" "-framework Cocoa" "-framework IOKit" "-framework CoreVideo" glfw)
target_compile_definitions(${TARGET} PUBLIC APPLE)
Expand All @@ -100,5 +103,4 @@ if (BUILD_GUI)
add_custom_command(TARGET customchar POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_SOURCE_DIR}/fonts $<TARGET_FILE_DIR:customchar>/fonts)

endif()
1 change: 0 additions & 1 deletion customchar/audio/speech_recognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ std::string SpeechRecognizer::Transcribe(const std::vector<float>& pcmf32,
const int n_segments = whisper_full_n_segments(context_);
for (int i = 0; i < n_segments; ++i) {
const char* text = whisper_full_get_segment_text(context_, i);

result += text;

const int n_tokens = whisper_full_n_tokens(context_, i);
Expand Down
2 changes: 1 addition & 1 deletion customchar/audio/voice_synthesizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ using namespace CC::audio;

VoiceSynthesizer::VoiceSynthesizer() {
// Check if the Say command is supported
std::string command = "Say --version";
std::string command = "which say";
FILE* pipe = popen(command.c_str(), "r");
if (pipe == nullptr) {
printf("Failed to run command: %s\n", command.c_str());
Expand Down
87 changes: 65 additions & 22 deletions customchar/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,16 @@
#include "customchar/common/helpers.h"
#include "customchar/session/chat_history.h"
#include "customchar/session/chat_message.h"
#include "customchar/vision/video_capture.h"

#include "imgui_internal.h"
#include "imspinner/imspinner.h"

using namespace CC;
using namespace CC::character;

vision::VideoCapture video_capture;

// [Win32] Our example includes a copy of glfw3.lib pre-compiled with VS2010 to
// maximize ease of testing and compatibility with old VS compilers. To link
// with VS2010-era libraries, VS2015+ requires linking with
Expand All @@ -43,7 +46,7 @@ using namespace CC::character;
// everytime user sends message, IMGUI sets global variable to message
// signal client server ... lock/unlock mutex
constexpr int TEXT_MESSAGE_SIZE = 1024 * 8;
constexpr int INIT_WINDOW_WIDTH = 450;
constexpr int INIT_WINDOW_WIDTH = 600;
constexpr int INIT_WINDOW_HEIGHT = 400;

static void GLFWErrorCallback(int error, const char* description) {
Expand Down Expand Up @@ -102,9 +105,6 @@ void runImgui(std::shared_ptr<session::ChatHistory> history) {
ImGui::CreateContext();
ImGuiIO& io = ImGui::GetIO();
(void)io;
// io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard; // Enable
// Keyboard Controls io.ConfigFlags |= ImGuiConfigFlags_NavEnableGamepad; //
// Enable Gamepad Controls

// Setup Dear ImGui style
// ImGui::StyleColorsDark();
Expand All @@ -122,24 +122,23 @@ void runImgui(std::shared_ptr<session::ChatHistory> history) {

// Our state
ImVec4 clear_color = ImVec4(0.45f, 0.55f, 0.60f, 1.00f);
bool justSent = true;
bool just_sent = true;

// Initial text
char text[TEXT_MESSAGE_SIZE] = "";

GLuint texture;
glGenTextures(1, &texture);
glBindTexture(GL_TEXTURE_2D, texture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);

bool last_enable_camera = false;
bool enable_camera = true;

// Main loop
while (!glfwWindowShouldClose(window)) {
// Poll and handle events (inputs, window resize, etc.)
// You can read the io.WantCaptureMouse, io.WantCaptureKeyboard
// flags to tell if dear imgui wants to use your inputs.
// - When io.WantCaptureMouse is true, do not dispatch mouse input
// data to your main application, or clear/overwrite your copy of
// the mouse data.
// - When io.WantCaptureKeyboard is true, do not dispatch keyboard
// input data to your main application, or clear/overwrite your copy
// of the keyboard data. Generally you may always pass all inputs to
// dear imgui, and hide them from your application based on those
// two flags
glfwPollEvents();

// Start the Dear ImGui frame
Expand All @@ -158,6 +157,50 @@ void runImgui(std::shared_ptr<session::ChatHistory> history) {
ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove |
ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoTitleBar);

ImVec2 window_size = ImGui::GetWindowSize();

// Check and start/stop camera
if (last_enable_camera != enable_camera) {
if (enable_camera) {
video_capture.Start();
// Adapt window height to camera aspect ratio
int window_width = window_size.x;
int window_height = window_width * video_capture.GetFrameHeight() /
video_capture.GetFrameWidth() +
200;
glfwSetWindowSize(window, window_width, window_height);
} else {
video_capture.Stop();
glfwSetWindowSize(window, INIT_WINDOW_WIDTH, INIT_WINDOW_HEIGHT);
}
last_enable_camera = enable_camera;
}

// Render camera
if (enable_camera) {
// Resize image to fit window
cv::Mat image = video_capture.GetFrame();
if (!image.empty()) {
cv::Mat resized_image;
float ratio = (float)image.cols / (float)image.rows;
int new_width = window_size.x - 20;
int new_height = new_width / ratio;
cv::resize(image, resized_image, cv::Size(new_width, new_height));
cv::cvtColor(resized_image, resized_image, cv::COLOR_BGR2RGBA);

// Display image
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, resized_image.cols,
resized_image.rows, 0, GL_RGBA, GL_UNSIGNED_BYTE,
resized_image.data);
ImGui::Image(reinterpret_cast<void*>(static_cast<intptr_t>(texture)),
ImVec2(resized_image.cols, resized_image.rows),
ImVec2(0, 0), ImVec2(1, 1), ImColor(255, 255, 255, 255),
ImColor(255, 255, 255, 128));
}
}

ImGui::Checkbox("Enable Camera", &enable_camera);

// Child window scrollable area
ImGuiWindowFlags window_flags = ImGuiWindowFlags_None;

Expand All @@ -176,7 +219,7 @@ void runImgui(std::shared_ptr<session::ChatHistory> history) {
ImGui::TextWrapped("> %s: %s", message.GetSender().c_str(),
message.GetMessage().c_str());
}
if (history->HasNewMessage() || justSent) {
if (history->HasNewMessage() || just_sent) {
ImGui::SetScrollHereY(1.0f);
}

Expand All @@ -188,9 +231,9 @@ void runImgui(std::shared_ptr<session::ChatHistory> history) {
ImGuiInputTextFlags input_flags = ImGuiInputTextFlags_ReadOnly;

// Refocus text area if text was just sent
if (justSent) {
if (just_sent) {
ImGui::SetKeyboardFocusHere();
justSent = false;
just_sent = false;
}

// Create a spinner and text input in the same line
Expand All @@ -204,11 +247,12 @@ void runImgui(std::shared_ptr<session::ChatHistory> history) {
strcpy(text, "Say something...");
ImGui::PushItemWidth(ImGui::GetContentRegionAvail().x);
if (ImGui::InputText("##source", text, IM_ARRAYSIZE(text), input_flags)) {
justSent = OnNewMessage(text, "User", history);
just_sent = OnNewMessage(text, "User", history);
};

// Put the cursor of InputTextMultiline at the end of the text
ImGui::SetKeyboardFocusHere();
ImGui::End();

// Rendering
ImGui::Render();
Expand All @@ -223,8 +267,7 @@ void runImgui(std::shared_ptr<session::ChatHistory> history) {
glfwSwapBuffers(window);
}

std::cout << "Main ImGUI loop ended" << std::endl;

glDeleteTextures(1, &texture);
ImGui_ImplOpenGL3_Shutdown();
ImGui_ImplGlfw_Shutdown();
ImGui::DestroyContext();
Expand Down
116 changes: 116 additions & 0 deletions customchar/vision/video_capture.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#ifndef CUSTOMCHAR_VISION_VIDEO_CAPTURE_H_
#define CUSTOMCHAR_VISION_VIDEO_CAPTURE_H_

#include <queue>
#include <string>
#include <thread>
#include <vector>

#include <opencv2/opencv.hpp>

namespace CC {
namespace vision {

class VideoCapture {
private:
int device_id_;
bool is_capturing_;

cv::VideoCapture capture_;
std::mutex capture_mutex_;

cv::Mat frame_;
std::mutex frame_mutex_;
std::thread capture_thread_;

/// @brief Start capturing frames from device
bool StartDevice() {
capture_ = cv::VideoCapture(device_id_);
if (!capture_.isOpened()) {
std::cerr << "Error opening video stream or file" << std::endl;
return false;
} else {
return true;
}
}

/// @brief Stop capturing frames from device
void StopDevice() {
if (capture_.isOpened()) capture_.release();
}

public:
VideoCapture() {}

/// @brief Get frame width
/// @return int
int GetFrameWidth() {
std::lock_guard<std::mutex> lock(frame_mutex_);
return frame_.cols;
}

/// @brief Get frame height
/// @return int
int GetFrameHeight() {
std::lock_guard<std::mutex> lock(frame_mutex_);
return frame_.rows;
}

/// @brief Capture frames from camera
void Capture() {
cv::Mat frame;
while (true) {
{
std::lock_guard<std::mutex> lock(capture_mutex_);
if (!is_capturing_) {
break;
}
capture_ >> frame;
}
if (frame.empty()) {
break;
}
std::lock_guard<std::mutex> lock(frame_mutex_);
frame_ = frame.clone();
}
}

/// @brief Start capturing frames
void Start() {
{
std::lock_guard<std::mutex> lock(capture_mutex_);
if (is_capturing_) {
return;
}
if (!StartDevice()) return;
// Get first frame to initialize frame size
std::lock_guard<std::mutex> frame_lock(frame_mutex_);
capture_ >> frame_;
is_capturing_ = true;
}
capture_thread_ = std::thread(&VideoCapture::Capture, this);
}

/// @brief Stop capturing frames
void Stop() {
std::lock_guard<std::mutex> lock(capture_mutex_);
if (!is_capturing_) {
return;
}
StopDevice();
}

/// @brief Get frame from queue
/// @return cv::Mat. Empty if queue is empty
cv::Mat GetFrame() {
cv::Mat frame;
std::lock_guard<std::mutex> lock(frame_mutex_);
frame = frame_.clone();
return frame;
}
};

} // namespace vision
} // namespace CC

#endif // CUSTOMCHAR_SESSION_CHAT_HISTORY_H_
2 changes: 1 addition & 1 deletion docs/architecture.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit c9bb98f

Please sign in to comment.