From b6a9feb23aeaa407500b779c30165073775551b7 Mon Sep 17 00:00:00 2001 From: Erick Fuentes Date: Mon, 2 Dec 2024 21:06:33 -0500 Subject: [PATCH] Nbuono/vo (#332) Co-authored-by: Nicolaniello Buono --- WORKSPACE | 1 - experimental/learn_descriptors/BUILD | 18 +++ .../learn_descriptors/visual_odometry.cc | 118 ++++++++++++++++++ .../learn_descriptors/visual_odometry.hh | 58 +++++++++ .../learn_descriptors/visual_odometry_test.cc | 112 +++++++++++++++++ third_party/BUILD.cpp_fast_csv_parser | 6 + 6 files changed, 312 insertions(+), 1 deletion(-) create mode 100644 experimental/learn_descriptors/visual_odometry.cc create mode 100644 experimental/learn_descriptors/visual_odometry.hh create mode 100644 experimental/learn_descriptors/visual_odometry_test.cc create mode 100644 third_party/BUILD.cpp_fast_csv_parser diff --git a/WORKSPACE b/WORKSPACE index 4a459fd2..3ba69c91 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -245,7 +245,6 @@ http_archive( ], patch_args=["-p1"], ) - load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") protobuf_deps() diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index d03c462c..e0f417bd 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -16,6 +16,15 @@ cc_test( ] ) +cc_library( + name = "visual_odometry", + hdrs = ["visual_odometry.hh"], + visibility = ["//visibility:public"], + srcs = ["visual_odometry.cc"], + deps = [ + "@opencv//:opencv" + ] +) cc_library( name = "symphony_lake_parser", @@ -29,6 +38,15 @@ cc_library( ] ) +cc_test( + name = "visual_odometry_test", + srcs = ["visual_odometry_test.cc"], + deps = [ + "@com_google_googletest//:gtest_main", + ":visual_odometry" + ] +) + cc_test( name = "symphony_lake_parser_test", srcs = ["symphony_lake_parser_test.cc"], diff --git a/experimental/learn_descriptors/visual_odometry.cc b/experimental/learn_descriptors/visual_odometry.cc new file mode 100644 index 00000000..36047320 --- /dev/null +++ b/experimental/learn_descriptors/visual_odometry.cc @@ -0,0 +1,118 @@ +#include "experimental/learn_descriptors/visual_odometry.hh" + +namespace robot::experimental::learn_descriptors { +VisualOdometry::VisualOdometry(Frontend::ExtractorType frontend_extractor, + Frontend::MatcherType frontend_matcher) { + frontend_ = Frontend(frontend_extractor, frontend_matcher); +} + +Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) { + extractor_type_ = frontend_algorithm; + matcher_type_ = frontend_matcher; + + switch (extractor_type_) { + case ExtractorType::SIFT: + feature_extractor_ = cv::SIFT::create(); + break; + case ExtractorType::ORB: + feature_extractor_ = cv::ORB::create(); + break; + default: + // Error handling needed? + break; + } + switch (matcher_type_) { + case MatcherType::BRUTE_FORCE: + descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); + break; + case MatcherType::KNN: + descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); + break; + case MatcherType::FLANN: + if (frontend_algorithm == ExtractorType::ORB) { + throw std::invalid_argument("FLANN can not be used with ORB."); + } + descriptor_matcher_ = cv::FlannBasedMatcher::create(); + break; + default: + // Error handling needed? + break; + } +} + +std::pair, cv::Mat> Frontend::get_keypoints_and_descriptors( + const cv::Mat &img) const { + std::vector keypoints; + cv::Mat descriptors; + switch (extractor_type_) { + default: // the opencv extractors have the same function signature + feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors); + break; + } + return std::pair, cv::Mat>(keypoints, descriptors); +} + +std::vector Frontend::get_matches(const cv::Mat &descriptors1, + const cv::Mat &descriptors2) const { + std::vector matches; + switch (matcher_type_) { + case MatcherType::BRUTE_FORCE: + get_brute_matches(descriptors1, descriptors2, matches); + break; + case MatcherType::KNN: + get_KNN_matches(descriptors1, descriptors2, matches); + break; + case MatcherType::FLANN: + get_FLANN_matches(descriptors1, descriptors2, matches); + break; + default: + break; + } + std::sort(matches.begin(), matches.end()); + return matches; +} + +bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const { + if (matcher_type_ != MatcherType::BRUTE_FORCE) { + return false; + } + matches_out.clear(); + descriptor_matcher_->match(descriptors1, descriptors2, matches_out); + return true; +} + +bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const { + if (matcher_type_ != MatcherType::KNN) { + return false; + } + std::vector> knn_matches; + descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2); + const float ratio_thresh = 0.7f; + matches_out.clear(); + for (size_t i = 0; i < knn_matches.size(); i++) { + if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) { + matches_out.push_back(knn_matches[i][0]); + } + } + return true; +} + +bool Frontend::get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const { + if (matcher_type_ != MatcherType::FLANN) { + return false; + } + std::vector> knn_matches; + descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2); + const float ratio_thresh = 0.7f; + matches_out.clear(); + for (size_t i = 0; i < knn_matches.size(); i++) { + if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) { + matches_out.push_back(knn_matches[i][0]); + } + } + return true; +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/visual_odometry.hh b/experimental/learn_descriptors/visual_odometry.hh new file mode 100644 index 00000000..b64434d2 --- /dev/null +++ b/experimental/learn_descriptors/visual_odometry.hh @@ -0,0 +1,58 @@ +#pragma once + +#include + +namespace robot::experimental::learn_descriptors { +class Frontend { + public: + enum class ExtractorType { SIFT, ORB }; + enum class MatcherType { BRUTE_FORCE, KNN, FLANN }; + + Frontend(){}; + Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher); + ~Frontend(){}; + + ExtractorType get_extractor_type() const { return extractor_type_; }; + MatcherType get_matcher_type() const { return matcher_type_; }; + + std::pair, cv::Mat> get_keypoints_and_descriptors( + const cv::Mat &img) const; + std::vector get_matches(const cv::Mat &descriptors1, + const cv::Mat &descriptors2) const; + + static void draw_keypoints(const cv::Mat &img, std::vector keypoints, + cv::Mat img_keypoints_out) { + cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1), + cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS); + } + static void draw_matches(const cv::Mat &img1, std::vector keypoints1, + const cv::Mat &img2, std::vector keypoints2, + std::vector matches, cv::Mat img_matches_out) { + cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out); + } + + private: + bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const; + bool get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const; + bool get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const; + ExtractorType extractor_type_; + MatcherType matcher_type_; + + cv::Ptr feature_extractor_; + cv::Ptr descriptor_matcher_; +}; +class VisualOdometry { + public: + VisualOdometry(Frontend::ExtractorType frontend_extractor, + Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN); + ~VisualOdometry(){}; + + private: + cv::Mat prev_image_; + + Frontend frontend_; +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/visual_odometry_test.cc b/experimental/learn_descriptors/visual_odometry_test.cc new file mode 100644 index 00000000..b1803317 --- /dev/null +++ b/experimental/learn_descriptors/visual_odometry_test.cc @@ -0,0 +1,112 @@ +#include "experimental/learn_descriptors/visual_odometry.hh" + +#include +#include + +#include "gtest/gtest.h" + +namespace robot::experimental::learn_descriptors { +TEST(VIO_TEST, frontend_pipeline_sweep) { + const size_t width = 640; + const size_t height = 480; + + const size_t pixel_shift_x = 20; + const size_t PIXEL_COMP_TOL = 20; + + cv::Mat image_1 = cv::Mat::zeros(height, width, CV_8UC3); + cv::Mat image_2; + + cv::Mat translation_mat = (cv::Mat_(2, 3) << 1, 0, pixel_shift_x, 0, 1, 0); + cv::Point rect_points[4] = {{150, 200}, {350, 200}, {350, 300}, {150, 300}}; + float com_x = 0.0f, com_y = 0.0f; + for (const cv::Point& rect_point : rect_points) { + com_x += rect_point.x; + com_y += rect_point.y; + } + com_x *= 0.25; + com_y *= 0.25; + cv::Point rotation_center(com_x, com_y); + cv::Mat rotation_matrix = cv::getRotationMatrix2D(rotation_center, 45, 1.0); + + const size_t line_spacing = 100; + for (size_t i = 0; i <= width / line_spacing; i++) { + size_t x = i * line_spacing + (width % line_spacing) / 2; + size_t b = i * 255.0 / (width / line_spacing); + size_t g = 255.0 - i * 255.0 / (width / line_spacing); + cv::line(image_1, cv::Point(x, 0), cv::Point(x, height - 1), cv::Scalar(b, g, 0), 2); + } + for (size_t i = 0; i <= height / line_spacing; i++) { + size_t y = i * line_spacing + (height % line_spacing) / 2; + size_t b = i * 255.0 / (width / line_spacing); + size_t g = 255.0 - i * 255.0 / (width / line_spacing); + cv::line(image_1, cv::Point(0, y), cv::Point(width - 1, y), cv::Scalar(b, g, 0), 2); + } + + cv::warpAffine(image_1, image_1, rotation_matrix, image_1.size()); + cv::warpAffine(image_1, image_2, translation_mat, image_1.size()); + + // cv::Mat img_test_disp; + // cv::hconcat(image_1, image_2, img_test_disp); + // cv::imshow("Test", img_test_disp); + // cv::waitKey(1000); + + Frontend::ExtractorType extractor_types[2] = {Frontend::ExtractorType::SIFT, + Frontend::ExtractorType::ORB}; + Frontend::MatcherType matcher_types[3] = {Frontend::MatcherType::BRUTE_FORCE, + Frontend::MatcherType::FLANN, + Frontend::MatcherType::KNN}; + + Frontend frontend; + std::pair, cv::Mat> keypoints_descriptors_pair_1; + std::pair, cv::Mat> keypoints_descriptors_pair_2; + std::vector matches; + cv::Mat img_keypoints_out_1(height, width, CV_8UC3), + img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3); + // cv::Mat img_display_test; + for (Frontend::ExtractorType extractor_type : extractor_types) { + for (Frontend::MatcherType matcher_type : matcher_types) { + printf("started frontend combination: (%d, %d)\n", static_cast(extractor_type), + static_cast(matcher_type)); + try { + frontend = Frontend(extractor_type, matcher_type); + } catch (const std::invalid_argument& e) { + assert(std::string(e.what()) == "FLANN can not be used with ORB."); // very jank... + continue; + } + keypoints_descriptors_pair_1 = frontend.get_keypoints_and_descriptors(image_1); + keypoints_descriptors_pair_2 = frontend.get_keypoints_and_descriptors(image_2); + matches = frontend.get_matches(keypoints_descriptors_pair_1.second, + keypoints_descriptors_pair_2.second); + frontend.draw_keypoints(image_1, keypoints_descriptors_pair_1.first, + img_keypoints_out_1); + frontend.draw_keypoints(image_2, keypoints_descriptors_pair_2.first, + img_keypoints_out_2); + frontend.draw_matches(image_1, keypoints_descriptors_pair_1.first, image_2, + keypoints_descriptors_pair_2.first, matches, img_matches_out); + // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); + // cv::vconcat(img_display_test, img_matches_out, img_display_test); + // std::stringstream text; + // text << "Extractor " << static_cast(extractor_type) << ", matcher " + // << static_cast(matcher_type); + // cv::putText(img_display_test, text.str(), cv::Point(20, height - 50), + // cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA); + // cv::imshow("Keypoints and Matches Output.", img_display_test); + // std::cout << "Press spacebar to pause." << std::endl; + // while (cv::waitKey(1000) == 32) { + // } + printf("completed frontend combination: (%d, %d)\n", static_cast(extractor_type), + static_cast(matcher_type)); + if (extractor_type != Frontend::ExtractorType::ORB) { // don't check ORB for now + for (const cv::DMatch match : matches) { + EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x - + keypoints_descriptors_pair_2.first[match.trainIdx].pt.x, + pixel_shift_x, pixel_shift_x + PIXEL_COMP_TOL); + EXPECT_NEAR(keypoints_descriptors_pair_2.first[match.trainIdx].pt.y - + keypoints_descriptors_pair_1.first[match.queryIdx].pt.y, + 0, PIXEL_COMP_TOL); + } + } + } + } +} +} // namespace robot::experimental::learn_descriptors diff --git a/third_party/BUILD.cpp_fast_csv_parser b/third_party/BUILD.cpp_fast_csv_parser new file mode 100644 index 00000000..487303e4 --- /dev/null +++ b/third_party/BUILD.cpp_fast_csv_parser @@ -0,0 +1,6 @@ +cc_library( + name = "cpp_fast_csv_parser", + srcs = ["csv.h"], + hdrs = ["csh.h"], + visibility = ["//visibility:public"], +) \ No newline at end of file