Nbuono/vo (#332)

Co-authored-by: Nicolaniello Buono <[email protected]>
ewfuentes · Dec 3, 2024 · b6a9feb · b6a9feb
1 parent f6fbd05
commit b6a9feb
Show file tree

Hide file tree

Showing 6 changed files with 312 additions and 1 deletion.
diff --git a/WORKSPACE b/WORKSPACE
@@ -245,7 +245,6 @@ http_archive(
   ],
   patch_args=["-p1"],
 )
-
 load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps")
 protobuf_deps()
 

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
@@ -16,6 +16,15 @@ cc_test(
   ]
 )
 
+cc_library(
+  name = "visual_odometry",
+  hdrs = ["visual_odometry.hh"],
+  visibility = ["//visibility:public"],
+  srcs = ["visual_odometry.cc"],
+  deps = [
+    "@opencv//:opencv"
+  ]
+)
 
 cc_library(
   name = "symphony_lake_parser",
@@ -29,6 +38,15 @@ cc_library(
   ]
 )
 
+cc_test(
+  name = "visual_odometry_test",
+  srcs = ["visual_odometry_test.cc"],  
+  deps = [
+    "@com_google_googletest//:gtest_main",
+    ":visual_odometry"
+  ]
+)
+
 cc_test(
   name = "symphony_lake_parser_test",
   srcs = ["symphony_lake_parser_test.cc"],

diff --git a/experimental/learn_descriptors/visual_odometry.cc b/experimental/learn_descriptors/visual_odometry.cc
@@ -0,0 +1,118 @@
+#include "experimental/learn_descriptors/visual_odometry.hh"
+
+namespace robot::experimental::learn_descriptors {
+VisualOdometry::VisualOdometry(Frontend::ExtractorType frontend_extractor,
+                               Frontend::MatcherType frontend_matcher) {
+    frontend_ = Frontend(frontend_extractor, frontend_matcher);
+}
+
+Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) {
+    extractor_type_ = frontend_algorithm;
+    matcher_type_ = frontend_matcher;
+
+    switch (extractor_type_) {
+        case ExtractorType::SIFT:
+            feature_extractor_ = cv::SIFT::create();
+            break;
+        case ExtractorType::ORB:
+            feature_extractor_ = cv::ORB::create();
+            break;
+        default:
+            // Error handling needed?
+            break;
+    }
+    switch (matcher_type_) {
+        case MatcherType::BRUTE_FORCE:
+            descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
+            break;
+        case MatcherType::KNN:
+            descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
+            break;
+        case MatcherType::FLANN:
+            if (frontend_algorithm == ExtractorType::ORB) {
+                throw std::invalid_argument("FLANN can not be used with ORB.");
+            }
+            descriptor_matcher_ = cv::FlannBasedMatcher::create();
+            break;
+        default:
+            // Error handling needed?
+            break;
+    }
+}
+
+std::pair<std::vector<cv::KeyPoint>, cv::Mat> Frontend::get_keypoints_and_descriptors(
+    const cv::Mat &img) const {
+    std::vector<cv::KeyPoint> keypoints;
+    cv::Mat descriptors;
+    switch (extractor_type_) {
+        default:  // the opencv extractors have the same function signature
+            feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors);
+            break;
+    }
+    return std::pair<std::vector<cv::KeyPoint>, cv::Mat>(keypoints, descriptors);
+}
+
+std::vector<cv::DMatch> Frontend::get_matches(const cv::Mat &descriptors1,
+                                              const cv::Mat &descriptors2) const {
+    std::vector<cv::DMatch> matches;
+    switch (matcher_type_) {
+        case MatcherType::BRUTE_FORCE:
+            get_brute_matches(descriptors1, descriptors2, matches);
+            break;
+        case MatcherType::KNN:
+            get_KNN_matches(descriptors1, descriptors2, matches);
+            break;
+        case MatcherType::FLANN:
+            get_FLANN_matches(descriptors1, descriptors2, matches);
+            break;
+        default:
+            break;
+    }
+    std::sort(matches.begin(), matches.end());
+    return matches;
+}
+
+bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                                 std::vector<cv::DMatch> &matches_out) const {
+    if (matcher_type_ != MatcherType::BRUTE_FORCE) {
+        return false;
+    }
+    matches_out.clear();
+    descriptor_matcher_->match(descriptors1, descriptors2, matches_out);
+    return true;
+}
+
+bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                               std::vector<cv::DMatch> &matches_out) const {
+    if (matcher_type_ != MatcherType::KNN) {
+        return false;
+    }
+    std::vector<std::vector<cv::DMatch>> knn_matches;
+    descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2);
+    const float ratio_thresh = 0.7f;
+    matches_out.clear();
+    for (size_t i = 0; i < knn_matches.size(); i++) {
+        if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
+            matches_out.push_back(knn_matches[i][0]);
+        }
+    }
+    return true;
+}
+
+bool Frontend::get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                                 std::vector<cv::DMatch> &matches_out) const {
+    if (matcher_type_ != MatcherType::FLANN) {
+        return false;
+    }
+    std::vector<std::vector<cv::DMatch>> knn_matches;
+    descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2);
+    const float ratio_thresh = 0.7f;
+    matches_out.clear();
+    for (size_t i = 0; i < knn_matches.size(); i++) {
+        if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
+            matches_out.push_back(knn_matches[i][0]);
+        }
+    }
+    return true;
+}
+}  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/visual_odometry.hh b/experimental/learn_descriptors/visual_odometry.hh
@@ -0,0 +1,58 @@
+#pragma once
+
+#include <opencv2/opencv.hpp>
+
+namespace robot::experimental::learn_descriptors {
+class Frontend {
+   public:
+    enum class ExtractorType { SIFT, ORB };
+    enum class MatcherType { BRUTE_FORCE, KNN, FLANN };
+
+    Frontend(){};
+    Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher);
+    ~Frontend(){};
+
+    ExtractorType get_extractor_type() const { return extractor_type_; };
+    MatcherType get_matcher_type() const { return matcher_type_; };
+
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat> get_keypoints_and_descriptors(
+        const cv::Mat &img) const;
+    std::vector<cv::DMatch> get_matches(const cv::Mat &descriptors1,
+                                        const cv::Mat &descriptors2) const;
+
+    static void draw_keypoints(const cv::Mat &img, std::vector<cv::KeyPoint> keypoints,
+                               cv::Mat img_keypoints_out) {
+        cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1),
+                          cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
+    }
+    static void draw_matches(const cv::Mat &img1, std::vector<cv::KeyPoint> keypoints1,
+                             const cv::Mat &img2, std::vector<cv::KeyPoint> keypoints2,
+                             std::vector<cv::DMatch> matches, cv::Mat img_matches_out) {
+        cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out);
+    }
+
+   private:
+    bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                           std::vector<cv::DMatch> &matches_out) const;
+    bool get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                         std::vector<cv::DMatch> &matches_out) const;
+    bool get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                           std::vector<cv::DMatch> &matches_out) const;
+    ExtractorType extractor_type_;
+    MatcherType matcher_type_;
+
+    cv::Ptr<cv::Feature2D> feature_extractor_;
+    cv::Ptr<cv::DescriptorMatcher> descriptor_matcher_;
+};
+class VisualOdometry {
+   public:
+    VisualOdometry(Frontend::ExtractorType frontend_extractor,
+                   Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN);
+    ~VisualOdometry(){};
+
+   private:
+    cv::Mat prev_image_;
+
+    Frontend frontend_;
+};
+}  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/visual_odometry_test.cc b/experimental/learn_descriptors/visual_odometry_test.cc
@@ -0,0 +1,112 @@
+#include "experimental/learn_descriptors/visual_odometry.hh"
+
+#include <iostream>
+#include <sstream>
+
+#include "gtest/gtest.h"
+
+namespace robot::experimental::learn_descriptors {
+TEST(VIO_TEST, frontend_pipeline_sweep) {
+    const size_t width = 640;
+    const size_t height = 480;
+
+    const size_t pixel_shift_x = 20;
+    const size_t PIXEL_COMP_TOL = 20;
+
+    cv::Mat image_1 = cv::Mat::zeros(height, width, CV_8UC3);
+    cv::Mat image_2;
+
+    cv::Mat translation_mat = (cv::Mat_<double>(2, 3) << 1, 0, pixel_shift_x, 0, 1, 0);
+    cv::Point rect_points[4] = {{150, 200}, {350, 200}, {350, 300}, {150, 300}};
+    float com_x = 0.0f, com_y = 0.0f;
+    for (const cv::Point& rect_point : rect_points) {
+        com_x += rect_point.x;
+        com_y += rect_point.y;
+    }
+    com_x *= 0.25;
+    com_y *= 0.25;
+    cv::Point rotation_center(com_x, com_y);
+    cv::Mat rotation_matrix = cv::getRotationMatrix2D(rotation_center, 45, 1.0);
+
+    const size_t line_spacing = 100;
+    for (size_t i = 0; i <= width / line_spacing; i++) {
+        size_t x = i * line_spacing + (width % line_spacing) / 2;
+        size_t b = i * 255.0 / (width / line_spacing);
+        size_t g = 255.0 - i * 255.0 / (width / line_spacing);
+        cv::line(image_1, cv::Point(x, 0), cv::Point(x, height - 1), cv::Scalar(b, g, 0), 2);
+    }
+    for (size_t i = 0; i <= height / line_spacing; i++) {
+        size_t y = i * line_spacing + (height % line_spacing) / 2;
+        size_t b = i * 255.0 / (width / line_spacing);
+        size_t g = 255.0 - i * 255.0 / (width / line_spacing);
+        cv::line(image_1, cv::Point(0, y), cv::Point(width - 1, y), cv::Scalar(b, g, 0), 2);
+    }
+
+    cv::warpAffine(image_1, image_1, rotation_matrix, image_1.size());
+    cv::warpAffine(image_1, image_2, translation_mat, image_1.size());
+
+    // cv::Mat img_test_disp;
+    // cv::hconcat(image_1, image_2, img_test_disp);
+    // cv::imshow("Test", img_test_disp);
+    // cv::waitKey(1000);
+
+    Frontend::ExtractorType extractor_types[2] = {Frontend::ExtractorType::SIFT,
+                                                  Frontend::ExtractorType::ORB};
+    Frontend::MatcherType matcher_types[3] = {Frontend::MatcherType::BRUTE_FORCE,
+                                              Frontend::MatcherType::FLANN,
+                                              Frontend::MatcherType::KNN};
+
+    Frontend frontend;
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_descriptors_pair_1;
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_descriptors_pair_2;
+    std::vector<cv::DMatch> matches;
+    cv::Mat img_keypoints_out_1(height, width, CV_8UC3),
+        img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3);
+    // cv::Mat img_display_test;
+    for (Frontend::ExtractorType extractor_type : extractor_types) {
+        for (Frontend::MatcherType matcher_type : matcher_types) {
+            printf("started frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
+                   static_cast<int>(matcher_type));
+            try {
+                frontend = Frontend(extractor_type, matcher_type);
+            } catch (const std::invalid_argument& e) {
+                assert(std::string(e.what()) == "FLANN can not be used with ORB.");  // very jank...
+                continue;
+            }
+            keypoints_descriptors_pair_1 = frontend.get_keypoints_and_descriptors(image_1);
+            keypoints_descriptors_pair_2 = frontend.get_keypoints_and_descriptors(image_2);
+            matches = frontend.get_matches(keypoints_descriptors_pair_1.second,
+                                           keypoints_descriptors_pair_2.second);
+            frontend.draw_keypoints(image_1, keypoints_descriptors_pair_1.first,
+                                    img_keypoints_out_1);
+            frontend.draw_keypoints(image_2, keypoints_descriptors_pair_2.first,
+                                    img_keypoints_out_2);
+            frontend.draw_matches(image_1, keypoints_descriptors_pair_1.first, image_2,
+                                  keypoints_descriptors_pair_2.first, matches, img_matches_out);
+            // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
+            // cv::vconcat(img_display_test, img_matches_out, img_display_test);
+            // std::stringstream text;
+            // text << "Extractor " << static_cast<int>(extractor_type) << ", matcher "
+            //      << static_cast<int>(matcher_type);
+            // cv::putText(img_display_test, text.str(), cv::Point(20, height - 50),
+            //             cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA);
+            // cv::imshow("Keypoints and Matches Output.", img_display_test);
+            // std::cout << "Press spacebar to pause." << std::endl;
+            // while (cv::waitKey(1000) == 32) {
+            // }
+            printf("completed frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
+                   static_cast<int>(matcher_type));
+            if (extractor_type != Frontend::ExtractorType::ORB) {  // don't check ORB for now
+                for (const cv::DMatch match : matches) {
+                    EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x -
+                                    keypoints_descriptors_pair_2.first[match.trainIdx].pt.x,
+                                pixel_shift_x, pixel_shift_x + PIXEL_COMP_TOL);
+                    EXPECT_NEAR(keypoints_descriptors_pair_2.first[match.trainIdx].pt.y -
+                                    keypoints_descriptors_pair_1.first[match.queryIdx].pt.y,
+                                0, PIXEL_COMP_TOL);
+                }
+            }
+        }
+    }
+}
+}  // namespace robot::experimental::learn_descriptors
diff --git a/third_party/BUILD.cpp_fast_csv_parser b/third_party/BUILD.cpp_fast_csv_parser
@@ -0,0 +1,6 @@
+cc_library(
+    name = "cpp_fast_csv_parser",
+    srcs = ["csv.h"],
+    hdrs = ["csh.h"],
+    visibility = ["//visibility:public"],
+)
-Original file line number
+Diff line change
@@ Expand Up / @@ -245,7 +245,6 @@ http_archive( @@
       ],
       patch_args=["-p1"],
     )
     load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps")
     protobuf_deps()
@@ Expand Down @@