added option to select lightglue or roma preselection

3DOM-FBK · Apr 11, 2024 · c227edd · c227edd
1 parent 3adec4e
commit c227edd
Show file tree

Hide file tree

Showing 7 changed files with 495 additions and 503 deletions.
diff --git a/belv_sp+lg.ipynb b/belv_sp+lg.ipynb
diff --git a/georeference_by_gcp.ipynb b/georeference_by_gcp.ipynb
diff --git a/src/deep_image_matching/__init__.py b/src/deep_image_matching/__init__.py
@@ -1,6 +1,11 @@
 __version__ = "1.1.1"
 
 import logging
+from time import time
+from collections import OrderedDict
+
+time_dict = OrderedDict()
+time_dict["start"] = time()
 
 # Check if pycolmap is installed
 try:
@@ -14,16 +19,29 @@
     NO_PYCOLMAP = True
 
 # Import submodules
-from . import io
-from . import utils
-from . import reconstruction
 from . import extractors
+
+time_dict["extractors"] = time() - time_dict["start"]
 from . import matchers
+
+time_dict["matchers"] = time() - time_dict["extractors"]
+
+from . import reconstruction
+
+time_dict["reconstruction"] = time() - time_dict["matchers"]
+
+from . import io
+from . import utils
 from . import visualization
+from . import thirdparty
+
+time_dict["aux"] = time() - time_dict["reconstruction"]
 
 if not NO_PYCOLMAP:
     from . import triangulation  # the triangulation module strictly requires pycolmap
 
+    time_dict["triangulation"] = time() - time_dict["aux"]
+
 try:
     from . import graph
 except ImportError:
@@ -33,7 +51,12 @@
 from .parser import parse_cli
 
 # Import classes and variables
-from .constants import *
 from .image_matching import ImageMatcher
-from .config import Config
 from .pairs_generator import PairsGenerator
+from .constants import *
+from .config import Config
+
+print("Deep Image Matching loaded in {:.3f} seconds.".format(time() - time_dict["start"]))
+# print("Time breakdown:")
+# for key in time_dict:
+#     print(f"{key}: {time_dict[key]:.3f}")
diff --git a/src/deep_image_matching/constants.py b/src/deep_image_matching/constants.py
@@ -1,3 +1,4 @@
+import inspect
 from enum import Enum
 from typing import Tuple
 
@@ -7,6 +8,34 @@
 timer = Timer(logger=logger)
 
 
+# def get_extractor_classes(root):
+#     classes = inspect.getmembers(root, inspect.isclass)
+#     classes = [c[0] for c in classes if issubclass(c[1], root.ExtractorBase)]
+#     return classes
+
+
+# def get_matcher_classes(root):
+#     classes = inspect.getmembers(root, inspect.isclass)
+#     classes = [c[0] for c in classes if issubclass(c[1], root.MatcherBase)]
+#     return classes
+
+
+class Pipeline(Enum):
+    """Enumeration for pipeline approaches."""
+
+    SUPERPOINT_LIGHTGLUE = 0
+    SUPERPOINT_SUPERGLUE = 1
+    DISK_LIGHTGLUE = 2
+    ALIKED_LIGHTGLUE = 3
+    ORB_KORNIA_MATCHER = 4
+    SIFT_KORNIA_MATCHER = 5
+    LOFTR = 6
+    SE2LOFTR = 7
+    ROMA = 8
+    KEYNETAFFNETHARDNET_KORNIA_MATCHER = 9
+    DEDODE_KORNIA_MATCHER = 10
+
+
 class TileSelection(Enum):
     """Enumeration for tile selection methods."""
 

diff --git a/src/deep_image_matching/image_matching.py b/src/deep_image_matching/image_matching.py
@@ -66,6 +66,7 @@ class ImageMatcher:
         "fast_viz": True,
         "hide_matching_track": True,
         "do_viz_tiles": False,
+        "preselection_pipeline": "superpoint+lightglue",
     }
 
     def __init__(

diff --git a/src/deep_image_matching/matchers/matcher_base.py b/src/deep_image_matching/matchers/matcher_base.py
@@ -372,14 +372,15 @@ def _match_by_tile(
             img1,
             method=method,
             quality=self.config["general"]["quality"],
-            preselction_extractor=self._preselction_extractor,
-            preselction_matcher=self._preselction_matcher,
             tile_size=self.config["general"]["tile_size"],
             tile_overlap=self.config["general"]["tile_overlap"],
+            preselction_extractor=self._preselction_extractor,
+            preselction_matcher=self._preselction_matcher,
+            pipeline=self.config["general"]["preselection_pipeline"],
             tile_preselection_size=self.tile_preselection_size,
             min_matches_per_tile=self.min_matches_per_tile,
             device=self._device,
-            debug_dir=self.config["general"]["output_dir"] / "debug",
+            debug_dir=self.config["general"]["output_dir"] / "debug" if self.config["general"]["do_viz"] else None,
         )
         timer.update("tile selection")
 
@@ -925,10 +926,11 @@ def tile_selection(
     img1: Path,
     method: TileSelection,
     quality: Quality,
-    preselction_extractor: ExtractorBase,
-    preselction_matcher: MatcherBase,
     tile_size: Tuple[int, int],
     tile_overlap: int,
+    preselction_extractor: ExtractorBase = None,
+    preselction_matcher: MatcherBase = None,
+    pipeline: str = "superpoint+lightglue",
     tile_preselection_size: int = 1024,
     min_matches_per_tile: int = 5,
     do_geometric_verification: bool = False,
@@ -982,48 +984,60 @@ def tile_selection(
         # Match tiles by preselection running matching on downsampled images
         logger.debug("Matching tiles by downsampling preselection")
 
-        # match downsampled images with roma
-        from ..thirdparty.RoMa.roma import roma_outdoor
-
-        n_matches = 2000
-        matcher = roma_outdoor(device, coarse_res=280, upsample_res=420)
-        H_A, W_A = i0_new_size
-        H_B, W_B = i1_new_size
-        warp, certainty = matcher.match(str(img0), str(img1), device=device)
-        matches, certainty = matcher.sample(warp, certainty, num=n_matches)
-        kp0, kp1 = matcher.to_pixel_coordinates(matches, H_A, W_A, H_B, W_B)
-        kp0, kp1 = kp0.cpu().numpy(), kp1.cpu().numpy()
-
-        # # Downsampled images
-        # size0 = i0.shape[:2][::-1]
-        # size1 = i1.shape[:2][::-1]
-        # scale0 = tile_preselection_size / max(size0)
-        # scale1 = tile_preselection_size / max(size1)
-        # size0_new = tuple(int(round(x * scale0)) for x in size0)
-        # size1_new = tuple(int(round(x * scale1)) for x in size1)
-        # i0 = cv2.resize(i0, size0_new, interpolation=cv2.INTER_AREA)
-        # i1 = cv2.resize(i1, size1_new, interpolation=cv2.INTER_AREA)
-
-        # # Run SuperPoint on downsampled images
-        # with torch.inference_mode():
-        #     feats0 = preselction_extractor({"image": frame2tensor(i0, device)})
-        #     feats1 = preselction_extractor({"image": frame2tensor(i1, device)})
-
-        #     # Match features with LightGlue
-        #     feats0 = sp2lg(feats0)
-        #     feats1 = sp2lg(feats1)
-        #     res = preselction_matcher({"image0": feats0, "image1": feats1})
-        #     res = rbd2np(res)
-
-        # # Get keypoints in original image
-        # kp0 = feats0["keypoints"].cpu().numpy()[0]
-        # kp0 = kp0[res["matches"][:, 0], :]
-        # kp1 = feats1["keypoints"].cpu().numpy()[0]
-        # kp1 = kp1[res["matches"][:, 1], :]
-
-        # # Scale up keypoints
-        # kp0 = kp0 / scale0
-        # kp1 = kp1 / scale1
+        if pipeline == "superpoint+lightglue":
+            if not preselction_extractor or not preselction_matcher:
+                raise ValueError(
+                    "Preselection extractor and matcher must be provided for superpoint+lightglue pipeline"
+                )
+
+            # Downsampled images
+            size0 = i0.shape[:2][::-1]
+            size1 = i1.shape[:2][::-1]
+            scale0 = tile_preselection_size / max(size0)
+            scale1 = tile_preselection_size / max(size1)
+            size0_new = tuple(int(round(x * scale0)) for x in size0)
+            size1_new = tuple(int(round(x * scale1)) for x in size1)
+            i0 = cv2.resize(i0, size0_new, interpolation=cv2.INTER_AREA)
+            i1 = cv2.resize(i1, size1_new, interpolation=cv2.INTER_AREA)
+
+            # Run SuperPoint on downsampled images
+            with torch.inference_mode():
+                feats0 = preselction_extractor({"image": frame2tensor(i0, device)})
+                feats1 = preselction_extractor({"image": frame2tensor(i1, device)})
+
+                # Match features with LightGlue
+                feats0 = sp2lg(feats0)
+                feats1 = sp2lg(feats1)
+                res = preselction_matcher({"image0": feats0, "image1": feats1})
+                res = rbd2np(res)
+
+            # Get keypoints in original image
+            kp0 = feats0["keypoints"].cpu().numpy()[0]
+            kp0 = kp0[res["matches"][:, 0], :]
+            kp1 = feats1["keypoints"].cpu().numpy()[0]
+            kp1 = kp1[res["matches"][:, 1], :]
+
+            # Scale up keypoints
+            kp0 = kp0 / scale0
+            kp1 = kp1 / scale1
+
+        elif pipeline == "roma":
+            # match downsampled images with roma
+            from ..thirdparty.RoMa.roma import roma_outdoor
+
+            n_matches = 2000
+            matcher = roma_outdoor(device, coarse_res=280, upsample_res=420)
+            H_A, W_A = i0_new_size
+            H_B, W_B = i1_new_size
+            warp, certainty = matcher.match(str(img0), str(img1), device=device)
+            matches, certainty = matcher.sample(warp, certainty, num=n_matches)
+            kp0, kp1 = matcher.to_pixel_coordinates(matches, H_A, W_A, H_B, W_B)
+            kp0, kp1 = kp0.cpu().numpy(), kp1.cpu().numpy()
+
+        else:
+            raise ValueError(
+                f"Invalid tile selection method: {method}. Only superpoint+lightglue and roma are supported so far"
+            )
 
         # geometric verification
         if do_geometric_verification:

diff --git a/src/deep_image_matching/matchers/roma.py b/src/deep_image_matching/matchers/roma.py
@@ -258,15 +258,17 @@ def write_tiles_disk(output_dir: Path, tiles: dict) -> None:
             img1,
             method=method,
             quality=self._quality,
-            preselction_extractor=self._preselction_extractor,
-            preselction_matcher=self._preselction_matcher,
             tile_size=tile_size,
             tile_overlap=overlap,
+            preselction_extractor=self._preselction_extractor,
+            preselction_matcher=self._preselction_matcher,
+            pipeline=self.config["general"]["preselection_pipeline"],
             tile_preselection_size=self.tile_preselection_size,
             min_matches_per_tile=self.min_matches_per_tile,
             device=self._device,
-            debug_dir=self.config["general"]["output_dir"] / "debug",
+            debug_dir=self.config["general"]["output_dir"] / "debug" if self.config["general"]["do_viz"] else None,
         )
+
         if len(tile_pairs) > self.max_tile_pairs:
             raise RuntimeError(
                 f"Too many tile pairs ({len(tile_pairs)}) to match, the matching process will be too slow and it may be inaccurate. Try to reduce the image resolution using a lower 'Quality' parameter."