nerfstudio-project · fbranschke · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024 · Dec 16, 2024
diff --git a/examples/benchmarks/compression/mcmc.sh b/examples/benchmarks/compression/mcmc.sh
@@ -21,24 +21,18 @@ CAP_MAX=1000000
 
 for SCENE in $SCENE_LIST;
 do
-    if [ "$SCENE" = "bonsai" ] || [ "$SCENE" = "counter" ] || [ "$SCENE" = "kitchen" ] || [ "$SCENE" = "room" ]; then
-        DATA_FACTOR=2
-    else
-        DATA_FACTOR=4
-    fi
-
     echo "Running $SCENE"
 
     # train without eval
-    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --eval_steps -1 --disable_viewer --data_factor $DATA_FACTOR \
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --eval_steps -1 --disable_viewer --data_factor 1 \
         --strategy.cap-max $CAP_MAX \
-        --data_dir data/360_v2/$SCENE/ \
+        --data_dir $SCENE_DIR/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/
 
     # eval: use vgg for lpips to align with other benchmarks
-    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor 1 \
         --strategy.cap-max $CAP_MAX \
-        --data_dir data/360_v2/$SCENE/ \
+        --data_dir $SCENE_DIR/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/ \
         --lpips_net vgg \
         --compression png \
@@ -49,7 +43,7 @@ done
 if command -v zip &> /dev/null
 then
     echo "Zipping results"
-    python benchmarks/compression/summarize_stats.py --results_dir $RESULT_DIR
+    python benchmarks/compression/summarize_stats.py --results_dir $RESULT_DIR --scenes $SCENE_LIST
 else
     echo "zip command not found, skipping zipping"
-fi
+fi
diff --git a/examples/benchmarks/compression/mcmc_db.sh b/examples/benchmarks/compression/mcmc_db.sh
@@ -0,0 +1,48 @@
+SCENE_DIR="data/db"
+# eval all 9 scenes for benchmarking
+SCENE_LIST="playroom drjohnson"
+
+# # 0.36M GSs
+# RESULT_DIR="results/benchmark_db_mcmc_0_36M_png_compression"
+# CAP_MAX=360000
+
+# # 0.49M GSs
+# RESULT_DIR="results/benchmark_db_mcmc_0_49M_png_compression"
+# CAP_MAX=490000
+
+# 1M GSs
+RESULT_DIR="results/benchmark_db_mcmc_1M_png_compression"
+CAP_MAX=1000000
+
+# # 4M GSs
+# RESULT_DIR="results/benchmark_db_mcmc_4M_png_compression"
+# CAP_MAX=4000000
+
+for SCENE in $SCENE_LIST;
+do
+    echo "Running $SCENE"
+
+    # train without eval
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --eval_steps -1 --disable_viewer --data_factor 1 \
+        --strategy.cap-max $CAP_MAX \
+        --data_dir $SCENE_DIR/$SCENE/ \
+        --result_dir $RESULT_DIR/$SCENE/
+
+    # eval: use vgg for lpips to align with other benchmarks
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor 1 \
+        --strategy.cap-max $CAP_MAX \
+        --data_dir $SCENE_DIR/$SCENE/ \
+        --result_dir $RESULT_DIR/$SCENE/ \
+        --lpips_net vgg \
+        --compression png \
+        --ckpt $RESULT_DIR/$SCENE/ckpts/ckpt_29999_rank0.pt
+done
+
+# Zip the compressed files and summarize the stats
+if command -v zip &> /dev/null
+then
+    echo "Zipping results"
+    python benchmarks/compression/summarize_stats.py --results_dir $RESULT_DIR --scenes $SCENE_LIST --scenes $SCENE_LIST
+else
+    echo "zip command not found, skipping zipping"
+fi
diff --git a/examples/benchmarks/compression/mcmc_tt.sh b/examples/benchmarks/compression/mcmc_tt.sh
@@ -42,7 +42,7 @@ done
 if command -v zip &> /dev/null
 then
     echo "Zipping results"
-    python benchmarks/compression/summarize_stats.py --results_dir $RESULT_DIR --scenes $SCENE_LIST
+    python benchmarks/compression/summarize_stats.py --results_dir $RESULT_DIR --scenes $SCENE_LIST --scenes $SCENE_LIST
 else
     echo "zip command not found, skipping zipping"
-fi
+fi
diff --git a/examples/benchmarks/compression/results/DeepBlending.csv b/examples/benchmarks/compression/results/DeepBlending.csv
@@ -0,0 +1,5 @@
+Submethod,PSNR,SSIM,LPIPS,Size [Bytes],#Gaussians
+,29.642436981201172,0.9031961858272552,0.27010108530521393,6793344.0,360000
+,29.868386268615723,0.9065537750720978,0.2643580883741379,8671012.0,490000
+-1.00M,29.534425735473633,0.9069415032863617,0.2537623792886734,15839839.0,1000000
+,29.610159873962402,0.9111461937427521,0.2394031211733818,57270424.5,4000000
diff --git a/examples/benchmarks/compression/results/MipNeRF360.csv b/examples/benchmarks/compression/results/MipNeRF360.csv
@@ -1,5 +1,6 @@
 Submethod,PSNR,SSIM,LPIPS,Size [Bytes],#Gaussians
-,26.64,0.788,0.270,6916294,360000
-,26.88,0.796,0.256,8796870,490000
--1.00M,27.29,0.811,0.229,16038022,1000000
-,27.70,0.825,0.197,57812682,4000000
+,26.332908206515842,0.770623869366116,0.28591954542530906,6863895.666666667,360000
+,26.436987982855904,0.7794780664973788,0.2721194707685047,8737225.333333334,490000
+-1.00M,26.67247136433919,0.7951412995656332,0.24599307609928978,15874812.444444444,1000000
+,26.77038065592448,0.8091744118266635,0.21410229057073593,56920032.55555555,4000000
+
diff --git a/examples/benchmarks/compression/results/TanksAndTemples.csv b/examples/benchmarks/compression/results/TanksAndTemples.csv
@@ -1,5 +1,5 @@
 Submethod,PSNR,SSIM,LPIPS,Size [Bytes],#Gaussians
-,23.54,0.838,0.200,6875669,360000
-,23.62,0.845,0.188,8728572,490000
--1.00M,24.03,0.857,0.163,16100628,1000000
-,24.47,0.872,0.132,58239022,4000000
+,23.484140396118164,0.8359003365039825,0.20022188872098923,6814856.5,360000
+,23.68420124053955,0.8424293696880341,0.18749213218688965,8710374.5,490000
+-1.00M,23.996936798095703,0.855468362569809,0.16304801404476166,16065561.5,1000000
+,24.45703887939453,0.8690102994441986,0.13164417818188667,58291533.5,4000000
diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
@@ -5,6 +5,7 @@
 
 import cv2
 import imageio.v2 as imageio
+from PIL import Image
 import numpy as np
 import torch
 from pycolmap import SceneManager
@@ -35,6 +36,7 @@ def __init__(
         factor: int = 1,
         normalize: bool = False,
         test_every: int = 8,
+        test_max_res: int = 1600,  # max image side length in pixel for test split
     ):
         self.data_dir = data_dir
         self.factor = factor
@@ -58,8 +60,12 @@ def __init__(
         w2c_mats = []
         camera_ids = []
         Ks_dict = dict()
+        Ks_dict_test = dict()
         params_dict = dict()
         imsize_dict = dict()  # width, height
+        imsize_dict_test = (
+            dict()
+        )  # width, height for test images -> max resolution limited
         mask_dict = dict()
         bottom = np.array([0, 0, 0, 1]).reshape(1, 4)
         for k in imdata:
@@ -201,8 +207,10 @@ def __init__(
         self.camtoworlds = camtoworlds  # np.ndarray, (num_images, 4, 4)
         self.camera_ids = camera_ids  # List[int], (num_images,)
         self.Ks_dict = Ks_dict  # Dict of camera_id -> K
+        self.Ks_dict_test = Ks_dict_test
         self.params_dict = params_dict  # Dict of camera_id -> params
         self.imsize_dict = imsize_dict  # Dict of camera_id -> (width, height)
+        self.imsize_dict_test = imsize_dict_test
         self.mask_dict = mask_dict  # Dict of camera_id -> mask
         self.points = points  # np.ndarray, (num_points, 3)
         self.points_err = points_err  # np.ndarray, (num_points,)
@@ -214,6 +222,14 @@ def __init__(
         # intrinsics stored in COLMAP corresponds to 2x upsampled images.
         actual_image = imageio.imread(self.image_paths[0])[..., :3]
         actual_height, actual_width = actual_image.shape[:2]
+
+        # need to check image resolution. create separate K for test set
+        max_side = max(actual_width, actual_height)
+        scale_test = 1
+        if max_side > test_max_res:
+            global_down_test = max_side / test_max_res
+            scale_test = float(global_down_test)
+
         colmap_width, colmap_height = self.imsize_dict[self.camera_ids[0]]
         s_height, s_width = actual_height / colmap_height, actual_width / colmap_width
         for camera_id, K in self.Ks_dict.items():
@@ -223,6 +239,15 @@ def __init__(
             width, height = self.imsize_dict[camera_id]
             self.imsize_dict[camera_id] = (int(width * s_width), int(height * s_height))
 
+            K_test = K.copy()
+            K_test[0, :] /= scale_test
+            K_test[1, :] /= scale_test
+            self.Ks_dict_test[camera_id] = K_test
+            self.imsize_dict_test[camera_id] = (
+                int(width * s_width / scale_test),
+                int(height * s_height / scale_test),
+            )
+
         # undistortion
         self.mapx_dict = dict()
         self.mapy_dict = dict()
@@ -324,14 +349,29 @@ def __len__(self):
 
     def __getitem__(self, item: int) -> Dict[str, Any]:
         index = self.indices[item]
-        image = imageio.imread(self.parser.image_paths[index])[..., :3]
+        image = Image.open(self.parser.image_paths[index])
         camera_id = self.parser.camera_ids[index]
-        K = self.parser.Ks_dict[camera_id].copy()  # undistorted K
         params = self.parser.params_dict[camera_id]
         camtoworlds = self.parser.camtoworlds[index]
         mask = self.parser.mask_dict[camera_id]
 
+        # use K with downscaled resolution for test split
+        if self.split == "train":
+            K = self.parser.Ks_dict[camera_id].copy()  # undistorted K
+        else:
+            K = self.parser.Ks_dict_test[camera_id].copy()
+
+        # downscale test image
+        if self.split != "train":
+            resized_image_PIL = image.resize(self.parser.imsize_dict_test[camera_id])
+            image = resized_image_PIL
+        image = np.array(image)
+        image = image[..., :3]
+
         if len(params) > 0:
+            # evaluation does not support distorted images
+            if self.split != "train":
+                assert False, "only undistorted datasets are handled for evaluation"
             # Images are distorted. Undistort them.
             mapx, mapy = (
                 self.parser.mapx_dict[camera_id],