From ccd9ebbf5e09fd2e142e54b5843cc1ef3f2b0375 Mon Sep 17 00:00:00 2001 From: Zhuoyang Date: Sat, 30 Sep 2023 02:18:59 -0700 Subject: [PATCH 01/10] add compute_cumulative_intersects_tensor --- diff_rast/cuda/csrc/bindings.cu | 33 +++++++++++++++++++++++++++++++++ diff_rast/cuda/csrc/bindings.h | 6 ++++++ 2 files changed, 39 insertions(+) diff --git a/diff_rast/cuda/csrc/bindings.cu b/diff_rast/cuda/csrc/bindings.cu index 429f74e25..acfc9a804 100644 --- a/diff_rast/cuda/csrc/bindings.cu +++ b/diff_rast/cuda/csrc/bindings.cu @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -253,3 +255,34 @@ project_gaussians_backward_tensor( return std::make_tuple(v_cov2d, v_cov3d, v_mean3d, v_scale, v_quat); } + +torch::Tensor compute_cumulative_intersects_tensor( + const int num_points, + torch::Tensor &num_tiles_hit, + int32_t &num_intersects +) { + // ref: + // https://nvlabs.github.io/cub/structcub_1_1_device_scan.html#a9416ac1ea26f9fde669d83ddc883795a + // allocate sum workspace + CHECK_INPUT(num_tiles_hit); + void *sum_ws = nullptr; + size_t sum_ws_bytes; + torch::Tensor cum_tiles_hit = + torch::zeros({num_points}, num_tiles_hit.options().dtype(torch::kInt32)); + cub::DeviceScan::InclusiveSum( + sum_ws, sum_ws_bytes, (int32_t *)num_tiles_hit.contiguous().data_ptr(), (int32_t *)cum_tiles_hit.contiguous().data_ptr(), num_points + ); + cudaMalloc(&sum_ws, sum_ws_bytes); + cub::DeviceScan::InclusiveSum( + sum_ws, sum_ws_bytes, (int32_t *)num_tiles_hit.contiguous().data_ptr(), (int32_t *)cum_tiles_hit.contiguous().data_ptr(), num_points + ); + // cudaMemcpy( + // &num_intersects, + // &(cum_tiles_hit[num_points - 1]), + // sizeof(int32_t), + // cudaMemcpyDeviceToHost + // ); + cudaFree(sum_ws); + + return cum_tiles_hit; +} diff --git a/diff_rast/cuda/csrc/bindings.h b/diff_rast/cuda/csrc/bindings.h index 0b392fdff..09d50a650 100644 --- a/diff_rast/cuda/csrc/bindings.h +++ b/diff_rast/cuda/csrc/bindings.h @@ -79,3 +79,9 @@ project_gaussians_backward_tensor( torch::Tensor &v_xy, torch::Tensor &v_conic ); + +torch::Tensor compute_cumulative_intersects_tensor( + const int num_points, + torch::Tensor &num_tiles_hit, + int32_t &num_intersects +); \ No newline at end of file From 744838ba49764427a777cf56c036a1e7315f764c Mon Sep 17 00:00:00 2001 From: Zhuoyang Date: Sun, 1 Oct 2023 01:35:47 -0700 Subject: [PATCH 02/10] fix testing --- tests/test_cov2d_bounds.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_cov2d_bounds.py b/tests/test_cov2d_bounds.py index f1613dd40..b8f2d690e 100644 --- a/tests/test_cov2d_bounds.py +++ b/tests/test_cov2d_bounds.py @@ -6,7 +6,7 @@ @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") -def compare_binding_to_pytorch(): +def test_compare_binding_to_pytorch(): from diff_rast._torch_impl import compute_cov2d_bounds as _compute_cov2d_bounds from diff_rast.cov2d_bounds import compute_cov2d_bounds @@ -29,4 +29,4 @@ def compare_binding_to_pytorch(): if __name__ == "__main__": - compare_binding_to_pytorch() + test_compare_binding_to_pytorch() From 7cb6f36040cbc8671f0216f5226067e636260827 Mon Sep 17 00:00:00 2001 From: Zhuoyang Date: Sun, 1 Oct 2023 04:11:12 -0700 Subject: [PATCH 03/10] finish cumulative check & part of mapping --- diff_rast/cuda/__init__.py | 1 + diff_rast/cuda/csrc/bindings.cu | 77 ++++++++++++++++++++++------- diff_rast/cuda/csrc/bindings.h | 20 ++++++-- diff_rast/cuda/csrc/ext.cpp | 1 + diff_rast/cuda/csrc/forward.cuh | 11 +++++ tests/test_cumulative_intersects.py | 28 +++++++++++ 6 files changed, 117 insertions(+), 21 deletions(-) create mode 100644 tests/test_cumulative_intersects.py diff --git a/diff_rast/cuda/__init__.py b/diff_rast/cuda/__init__.py index f995b41fe..adb65700c 100644 --- a/diff_rast/cuda/__init__.py +++ b/diff_rast/cuda/__init__.py @@ -18,3 +18,4 @@ def call_cuda(*args, **kwargs): project_gaussians_backward = _make_lazy_cuda_func("project_gaussians_backward") compute_sh_forward = _make_lazy_cuda_func("compute_sh_forward") compute_sh_backward = _make_lazy_cuda_func("compute_sh_backward") +compute_cumulative_intersects = _make_lazy_cuda_func("compute_cumulative_intersects") diff --git a/diff_rast/cuda/csrc/bindings.cu b/diff_rast/cuda/csrc/bindings.cu index acfc9a804..2cfb4e5a9 100644 --- a/diff_rast/cuda/csrc/bindings.cu +++ b/diff_rast/cuda/csrc/bindings.cu @@ -256,33 +256,74 @@ project_gaussians_backward_tensor( return std::make_tuple(v_cov2d, v_cov3d, v_mean3d, v_scale, v_quat); } -torch::Tensor compute_cumulative_intersects_tensor( +std::tuple< + torch::Tensor, + torch::Tensor> +compute_cumulative_intersects_tensor( const int num_points, - torch::Tensor &num_tiles_hit, - int32_t &num_intersects + torch::Tensor &num_tiles_hit ) { // ref: // https://nvlabs.github.io/cub/structcub_1_1_device_scan.html#a9416ac1ea26f9fde669d83ddc883795a // allocate sum workspace CHECK_INPUT(num_tiles_hit); - void *sum_ws = nullptr; - size_t sum_ws_bytes; + torch::Tensor cum_tiles_hit = torch::zeros({num_points}, num_tiles_hit.options().dtype(torch::kInt32)); - cub::DeviceScan::InclusiveSum( - sum_ws, sum_ws_bytes, (int32_t *)num_tiles_hit.contiguous().data_ptr(), (int32_t *)cum_tiles_hit.contiguous().data_ptr(), num_points + + int32_t num_intersects; + compute_cumulative_intersects( + num_points, + num_tiles_hit.contiguous().data_ptr(), + num_intersects, + cum_tiles_hit.contiguous().data_ptr() ); - cudaMalloc(&sum_ws, sum_ws_bytes); - cub::DeviceScan::InclusiveSum( - sum_ws, sum_ws_bytes, (int32_t *)num_tiles_hit.contiguous().data_ptr(), (int32_t *)cum_tiles_hit.contiguous().data_ptr(), num_points + + return std::make_tuple(torch::tensor(num_intersects, num_tiles_hit.options().dtype(torch::kInt32)), + cum_tiles_hit); +} + +std::tuple< + torch::Tensor, + torch::Tensor> +map_gaussian_to_intersects_tensor( + const int num_points, + torch::Tensor &xys, + torch::Tensor &depths, + torch::Tensor &radii, + torch::Tensor &cum_tiles_hit, + const std::tuple tile_bounds +) { + CHECK_INPUT(xys); + CHECK_INPUT(depths); + CHECK_INPUT(radii); + CHECK_INPUT(cum_tiles_hit); + + dim3 tile_bounds_dim3; + tile_bounds_dim3.x = std::get<0>(tile_bounds); + tile_bounds_dim3.y = std::get<1>(tile_bounds); + tile_bounds_dim3.z = std::get<2>(tile_bounds); + + int32_t num_intersects = cum_tiles_hit[num_points - 1].item(); + + torch::Tensor gaussian_ids_unsorted = + torch::zeros({num_intersects}, xys.options().dtype(torch::kInt32)); + torch::Tensor isect_ids_unsorted = + torch::zeros({num_intersects}, xys.options().dtype(torch::kInt64)); + + map_gaussian_to_intersects<<< + (num_points + N_THREADS - 1) / N_THREADS, + N_THREADS>>>( + num_points, + (float2 *)xys.contiguous().data_ptr(), + depths.contiguous().data_ptr(), + radii.contiguous().data_ptr(), + cum_tiles_hit.contiguous().data_ptr(), + tile_bounds_dim3, + // Outputs. + isect_ids_unsorted.contiguous().data_ptr(), + gaussian_ids_unsorted.contiguous().data_ptr() ); - // cudaMemcpy( - // &num_intersects, - // &(cum_tiles_hit[num_points - 1]), - // sizeof(int32_t), - // cudaMemcpyDeviceToHost - // ); - cudaFree(sum_ws); - return cum_tiles_hit; + return std::make_tuple(gaussian_ids_unsorted, isect_ids_unsorted); } diff --git a/diff_rast/cuda/csrc/bindings.h b/diff_rast/cuda/csrc/bindings.h index 09d50a650..852004824 100644 --- a/diff_rast/cuda/csrc/bindings.h +++ b/diff_rast/cuda/csrc/bindings.h @@ -80,8 +80,22 @@ project_gaussians_backward_tensor( torch::Tensor &v_conic ); -torch::Tensor compute_cumulative_intersects_tensor( +std::tuple< + torch::Tensor, + torch::Tensor> +compute_cumulative_intersects_tensor( + const int num_points, + torch::Tensor &num_tiles_hit +); + +std::tuple< + torch::Tensor, + torch::Tensor> +map_gaussian_to_intersects_tensor( const int num_points, - torch::Tensor &num_tiles_hit, - int32_t &num_intersects + torch::Tensor &xys, + torch::Tensor &depths, + torch::Tensor &radii, + torch::Tensor &cum_tiles_hit, + const std::tuple tile_bounds ); \ No newline at end of file diff --git a/diff_rast/cuda/csrc/ext.cpp b/diff_rast/cuda/csrc/ext.cpp index 9e77bc3cf..74df8ddd5 100644 --- a/diff_rast/cuda/csrc/ext.cpp +++ b/diff_rast/cuda/csrc/ext.cpp @@ -10,4 +10,5 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("project_gaussians_backward", &project_gaussians_backward_tensor); m.def("compute_sh_forward", &compute_sh_forward_tensor); m.def("compute_sh_backward", &compute_sh_backward_tensor); + m.def("compute_cumulative_intersects", &compute_cumulative_intersects_tensor); } diff --git a/diff_rast/cuda/csrc/forward.cuh b/diff_rast/cuda/csrc/forward.cuh index 34ab8f84d..624b8ef50 100644 --- a/diff_rast/cuda/csrc/forward.cuh +++ b/diff_rast/cuda/csrc/forward.cuh @@ -78,3 +78,14 @@ __host__ __device__ float3 project_cov3d_ewa( __host__ __device__ void scale_rot_to_cov3d( const float3 scale, const float glob_scale, const float4 quat, float *cov3d ); + +__global__ void map_gaussian_to_intersects( + const int num_points, + const float2 *xys, + const float *depths, + const int *radii, + const int32_t *cum_tiles_hit, + const dim3 tile_bounds, + int64_t *isect_ids, + int32_t *gaussian_ids +); \ No newline at end of file diff --git a/tests/test_cumulative_intersects.py b/tests/test_cumulative_intersects.py new file mode 100644 index 000000000..4339b1912 --- /dev/null +++ b/tests/test_cumulative_intersects.py @@ -0,0 +1,28 @@ +import pytest +import torch + + +device = torch.device("cuda:0") + + +@pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device") +def test_cumulative_intersects(): + import diff_rast.cuda as _C + + torch.manual_seed(42) + + num_points = 10 + + num_tiles_hit = torch.randint(0, 100, (num_points,), device=device, dtype=torch.int32) + + num_intersects, cum_tiles_hit = _C.compute_cumulative_intersects(num_points, num_tiles_hit) + + _cum_tiles_hit = torch.cumsum(num_tiles_hit, dim=0, dtype=torch.int32) + _num_intersects = _cum_tiles_hit[-1] + + torch.testing.assert_close(num_intersects, _num_intersects) + torch.testing.assert_close(cum_tiles_hit, _cum_tiles_hit) + + +if __name__ == "__main__": + test_cumulative_intersects() From e1f1cdde4157f2036ed47f17bfd21ff256ff7c44 Mon Sep 17 00:00:00 2001 From: maturk Date: Mon, 2 Oct 2023 16:38:12 +0300 Subject: [PATCH 04/10] project gaussians test impl --- diff_rast/_torch_impl.py | 33 +++++++++---- tests/test_project_gaussians.py | 88 +++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 10 deletions(-) create mode 100644 tests/test_project_gaussians.py diff --git a/diff_rast/_torch_impl.py b/diff_rast/_torch_impl.py index 5d2b25af8..498965dac 100644 --- a/diff_rast/_torch_impl.py +++ b/diff_rast/_torch_impl.py @@ -155,7 +155,13 @@ def scale_rot_to_cov3d(scale: Tensor, glob_scale: float, quat: Tensor) -> Tensor def project_cov3d_ewa( - mean3d: Tensor, cov3d: Tensor, viewmat: Tensor, fx: float, fy: float + mean3d: Tensor, + cov3d: Tensor, + viewmat: Tensor, + fx: float, + fy: float, + tan_fovx: float, + tan_fovy: float, ) -> Tensor: assert mean3d.shape[-1] == 3, mean3d.shape assert cov3d.shape[-2:] == (3, 3), cov3d.shape @@ -163,9 +169,13 @@ def project_cov3d_ewa( W = viewmat[..., :3, :3] # (..., 3, 3) p = viewmat[..., :3, 3] # (..., 3) t = torch.matmul(W, mean3d[..., None])[..., 0] + p # (..., 3) - raise NotImplementedError( - "Need to incorporate changes from this commit: 85e76e1c8b8e102145922f561800a74262ceb196!" - ) + + lim_x = 1.3 * torch.tensor([tan_fovx], device=mean3d.device) + lim_y = 1.3 * torch.tensor([tan_fovy], device=mean3d.device) + + t[..., 0] = t[..., 2] * torch.min(lim_x, torch.max(-lim_x, t[..., 0] / t[..., 2])) + t[..., 1] = t[..., 2] * torch.min(lim_y, torch.max(-lim_y, t[..., 1] / t[..., 2])) + rz = 1.0 / t[..., 2] # (...,) rz2 = rz**2 # (...,) J = torch.stack( @@ -178,8 +188,8 @@ def project_cov3d_ewa( T = J @ W # (..., 2, 3) cov2d = T @ cov3d @ T.transpose(-1, -2) # (..., 2, 2) # add a little blur along axes and (TODO save upper triangular elements) - cov2d[..., 0, 0] = cov2d[..., 0, 0] + 0.1 - cov2d[..., 1, 1] = cov2d[..., 1, 1] + 0.1 + cov2d[..., 0, 0] = cov2d[..., 0, 0] + 0.3 + cov2d[..., 1, 1] = cov2d[..., 1, 1] + 0.3 return cov2d @@ -215,11 +225,11 @@ def project_pix(mat, p, img_size, eps=1e-6): return torch.stack([u, v], dim=-1) -def clip_near_plane(p, viewmat, thresh=0.1): +def clip_near_plane(p, viewmat, clip_thresh=0.01): R = viewmat[..., :3, :3] T = viewmat[..., :3, 3] p_view = torch.matmul(R, p[..., None])[..., 0] + T - return p_view, p_view[..., 2] < thresh + return p_view, p_view[..., 2] < clip_thresh def get_tile_bbox(pix_center, pix_radius, tile_bounds, BLOCK_X=16, BLOCK_Y=16): @@ -259,10 +269,13 @@ def project_gaussians_forward( fy, img_size, tile_bounds, + clip_thresh=0.01, ): - p_view, is_close = clip_near_plane(means3d, viewmat) + tan_fovx = 0.5 * img_size[1] / fx + tan_fovy = 0.5 * img_size[0] / fy + p_view, is_close = clip_near_plane(means3d, viewmat, clip_thresh) cov3d = scale_rot_to_cov3d(scales, glob_scale, quats) - cov2d = project_cov3d_ewa(means3d, cov3d, viewmat, fx, fy) + cov2d = project_cov3d_ewa(means3d, cov3d, viewmat, fx, fy, tan_fovx, tan_fovy) conic, radius, det_valid = compute_cov2d_bounds(cov2d) center = project_pix(projmat, means3d, img_size) tile_min, tile_max = get_tile_bbox(center, radius, tile_bounds) diff --git a/tests/test_project_gaussians.py b/tests/test_project_gaussians.py new file mode 100644 index 000000000..7519c32be --- /dev/null +++ b/tests/test_project_gaussians.py @@ -0,0 +1,88 @@ +import pytest +import torch + +device = torch.device("cuda:0") + + +@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") +def test_project_gaussians_forward(): + from diff_rast import _torch_impl + import diff_rast.cuda as _C + + torch.manual_seed(42) + + num_points = 100 + means3d = torch.randn((num_points, 3), device=device, requires_grad=True) + scales = torch.randn((num_points, 3), device=device) + glob_scale = 0.3 + quats = torch.randn((num_points, 4), device=device) + quats /= torch.linalg.norm(quats, dim=-1, keepdim=True) + viewmat = torch.eye(4, device=device) + projmat = torch.eye(4, device=device) + fx, fy = 3.0, 3.0 + H, W = 512, 512 + clip_thresh = 0.01 + + BLOCK_X, BLOCK_Y = 16, 16 + tile_bounds = (W + BLOCK_X - 1) // BLOCK_X, (H + BLOCK_Y - 1) // BLOCK_Y, 1 + + ( + cov3d, + xys, + depths, + radii, + conics, + num_tiles_hit, + ) = _C.project_gaussians_forward( + num_points, + means3d, + scales, + glob_scale, + quats, + viewmat, + projmat, + fx, + fy, + H, + W, + tile_bounds, + clip_thresh, + ) + + ( + _cov3d, + _xys, + _depths, + _radii, + _conics, + _num_tiles_hit, + _masks, + ) = _torch_impl.project_gaussians_forward( + means3d, + scales, + glob_scale, + quats, + viewmat, + projmat, + fx, + fy, + (H, W), + tile_bounds, + clip_thresh, + ) + + torch.testing.assert_close( + cov3d[_masks], + _cov3d.view(-1, 9)[_masks][:, [0, 1, 2, 4, 5, 8]], + atol=1e-5, + rtol=1e-5, + ) + torch.testing.assert_close(xys[_masks], _xys[_masks], atol=1e-4, rtol=1e-4,) + torch.testing.assert_close(depths[_masks], _depths[_masks]) + torch.testing.assert_close(radii[_masks], _radii[_masks]) + torch.testing.assert_close(conics[_masks], _conics[_masks]) + torch.testing.assert_close(num_tiles_hit[_masks], _num_tiles_hit[_masks]) + + +if __name__ == "__main__": + test_project_gaussians_forward() \ No newline at end of file From 69e86a78e1d2e78ffb3a44f9180ac2f6bc483d75 Mon Sep 17 00:00:00 2001 From: Zhuoyang Date: Mon, 2 Oct 2023 13:24:49 -0700 Subject: [PATCH 05/10] finish map_gaussians --- diff_rast/_torch_impl.py | 37 +++++++++++++++- diff_rast/cuda/__init__.py | 1 + diff_rast/cuda/csrc/bindings.cu | 2 +- diff_rast/cuda/csrc/ext.cpp | 1 + tests/test_map_gaussians.py | 78 +++++++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+), 2 deletions(-) create mode 100644 tests/test_map_gaussians.py diff --git a/diff_rast/_torch_impl.py b/diff_rast/_torch_impl.py index 498965dac..81daa9a9b 100644 --- a/diff_rast/_torch_impl.py +++ b/diff_rast/_torch_impl.py @@ -1,7 +1,7 @@ """Pure PyTorch implementations of various functions""" - import torch import torch.nn.functional as F +import struct from jaxtyping import Float from torch import Tensor @@ -291,3 +291,38 @@ def project_gaussians_forward( conics = conic return cov3d, xys, depths, radii, conics, num_tiles_hit, mask + +def map_gaussian_to_intersects( + num_points, + xys, + depths, + radii, + cum_tiles_hit, + tile_bounds +): + num_intersects = cum_tiles_hit[-1] + isect_ids = torch.zeros(num_intersects, dtype=torch.int64, device=xys.device) + gaussian_ids = torch.zeros(num_intersects, dtype=torch.int32, device=xys.device) + + for idx in range(num_points): + if radii[idx] <= 0: break + + tile_min, tile_max = get_tile_bbox(xys[idx], radii[idx], tile_bounds) + + cur_idx = 0 if idx == 0 else cum_tiles_hit[idx - 1] + + # Get raw byte representation of the float value at the given index + raw_bytes = struct.pack('f', depths[idx]) + + # Interpret those bytes as an int32_t + depth_id_n = struct.unpack('i', raw_bytes)[0] + + for i in range(tile_min[1], tile_max[1]): + for j in range(tile_min[0], tile_max[0]): + tile_id = i * tile_bounds[0] + j + isect_ids[cur_idx] = (tile_id << 32) | depth_id_n + gaussian_ids[cur_idx] = idx + cur_idx += 1 + + return isect_ids, gaussian_ids + diff --git a/diff_rast/cuda/__init__.py b/diff_rast/cuda/__init__.py index adb65700c..d418081cd 100644 --- a/diff_rast/cuda/__init__.py +++ b/diff_rast/cuda/__init__.py @@ -19,3 +19,4 @@ def call_cuda(*args, **kwargs): compute_sh_forward = _make_lazy_cuda_func("compute_sh_forward") compute_sh_backward = _make_lazy_cuda_func("compute_sh_backward") compute_cumulative_intersects = _make_lazy_cuda_func("compute_cumulative_intersects") +map_gaussian_to_intersects = _make_lazy_cuda_func("map_gaussian_to_intersects") diff --git a/diff_rast/cuda/csrc/bindings.cu b/diff_rast/cuda/csrc/bindings.cu index 2cfb4e5a9..9b5cc8753 100644 --- a/diff_rast/cuda/csrc/bindings.cu +++ b/diff_rast/cuda/csrc/bindings.cu @@ -325,5 +325,5 @@ map_gaussian_to_intersects_tensor( gaussian_ids_unsorted.contiguous().data_ptr() ); - return std::make_tuple(gaussian_ids_unsorted, isect_ids_unsorted); + return std::make_tuple(isect_ids_unsorted, gaussian_ids_unsorted); } diff --git a/diff_rast/cuda/csrc/ext.cpp b/diff_rast/cuda/csrc/ext.cpp index 74df8ddd5..1df3cd8ae 100644 --- a/diff_rast/cuda/csrc/ext.cpp +++ b/diff_rast/cuda/csrc/ext.cpp @@ -11,4 +11,5 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("compute_sh_forward", &compute_sh_forward_tensor); m.def("compute_sh_backward", &compute_sh_backward_tensor); m.def("compute_cumulative_intersects", &compute_cumulative_intersects_tensor); + m.def("map_gaussian_to_intersects", &map_gaussian_to_intersects_tensor); } diff --git a/tests/test_map_gaussians.py b/tests/test_map_gaussians.py new file mode 100644 index 000000000..fc1653878 --- /dev/null +++ b/tests/test_map_gaussians.py @@ -0,0 +1,78 @@ +import pytest +import torch + + +device = torch.device("cuda:0") + + +@pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device") +def test_map_gaussians(): + from diff_rast import _torch_impl + import diff_rast.cuda as _C + + torch.manual_seed(42) + + num_points = 100 + means3d = torch.randn((num_points, 3), device=device, requires_grad=True) + scales = torch.randn((num_points, 3), device=device) + glob_scale = 0.3 + quats = torch.randn((num_points, 4), device=device) + quats /= torch.linalg.norm(quats, dim=-1, keepdim=True) + viewmat = torch.eye(4, device=device) + projmat = torch.eye(4, device=device) + fx, fy = 3.0, 3.0 + H, W = 512, 512 + clip_thresh = 0.01 + + BLOCK_X, BLOCK_Y = 16, 16 + tile_bounds = (W + BLOCK_X - 1) // BLOCK_X, (H + BLOCK_Y - 1) // BLOCK_Y, 1 + + ( + _cov3d, + _xys, + _depths, + _radii, + _conics, + _num_tiles_hit, + _masks, + ) = _torch_impl.project_gaussians_forward( + means3d, + scales, + glob_scale, + quats, + viewmat, + projmat, + fx, + fy, + (H, W), + tile_bounds, + clip_thresh, + ) + + _cum_tiles_hit = torch.cumsum(_num_tiles_hit, dim=0, dtype=torch.int32) + _depths = _depths.contiguous() + + isect_ids, gaussian_ids = _C.map_gaussian_to_intersects( + num_points, + _xys, + _depths, + _radii, + _cum_tiles_hit, + tile_bounds + ) + + _isect_ids, _gaussian_ids = _torch_impl.map_gaussian_to_intersects( + num_points, + _xys, + _depths, + _radii, + _cum_tiles_hit, + tile_bounds + ) + + torch.testing.assert_close(gaussian_ids, _gaussian_ids) + torch.testing.assert_close(isect_ids, _isect_ids) + + +if __name__ == "__main__": + test_map_gaussians() From 22c1e21b65e9d798d6c569dac9f5047124a1ae44 Mon Sep 17 00:00:00 2001 From: Zhuoyang Date: Mon, 2 Oct 2023 13:39:38 -0700 Subject: [PATCH 06/10] black formatting --- diff_rast/_torch_impl.py | 26 +++++++++++--------------- diff_rast/project_gaussians.py | 8 +++++++- tests/test_cumulative_intersects.py | 12 ++++++++---- tests/test_map_gaussians.py | 22 ++++++---------------- tests/test_project_gaussians.py | 11 ++++++++--- 5 files changed, 40 insertions(+), 39 deletions(-) diff --git a/diff_rast/_torch_impl.py b/diff_rast/_torch_impl.py index 81daa9a9b..f100d1624 100644 --- a/diff_rast/_torch_impl.py +++ b/diff_rast/_torch_impl.py @@ -292,37 +292,33 @@ def project_gaussians_forward( return cov3d, xys, depths, radii, conics, num_tiles_hit, mask + def map_gaussian_to_intersects( - num_points, - xys, - depths, - radii, - cum_tiles_hit, - tile_bounds + num_points, xys, depths, radii, cum_tiles_hit, tile_bounds ): num_intersects = cum_tiles_hit[-1] isect_ids = torch.zeros(num_intersects, dtype=torch.int64, device=xys.device) gaussian_ids = torch.zeros(num_intersects, dtype=torch.int32, device=xys.device) - + for idx in range(num_points): - if radii[idx] <= 0: break - + if radii[idx] <= 0: + break + tile_min, tile_max = get_tile_bbox(xys[idx], radii[idx], tile_bounds) - + cur_idx = 0 if idx == 0 else cum_tiles_hit[idx - 1] # Get raw byte representation of the float value at the given index - raw_bytes = struct.pack('f', depths[idx]) + raw_bytes = struct.pack("f", depths[idx]) # Interpret those bytes as an int32_t - depth_id_n = struct.unpack('i', raw_bytes)[0] - + depth_id_n = struct.unpack("i", raw_bytes)[0] + for i in range(tile_min[1], tile_max[1]): for j in range(tile_min[0], tile_max[0]): tile_id = i * tile_bounds[0] + j isect_ids[cur_idx] = (tile_id << 32) | depth_id_n gaussian_ids[cur_idx] = idx cur_idx += 1 - + return isect_ids, gaussian_ids - diff --git a/diff_rast/project_gaussians.py b/diff_rast/project_gaussians.py index ad9e53a54..1d3147e70 100644 --- a/diff_rast/project_gaussians.py +++ b/diff_rast/project_gaussians.py @@ -102,7 +102,13 @@ def backward(ctx, v_xys, v_depths, v_radii, v_conics, v_num_tiles_hit, v_cov3d): conics, ) = ctx.saved_tensors - (v_cov2d, v_cov3d, v_mean3d, v_scale, v_quat,) = _C.project_gaussians_backward( + ( + v_cov2d, + v_cov3d, + v_mean3d, + v_scale, + v_quat, + ) = _C.project_gaussians_backward( ctx.num_points, means3d, scales, diff --git a/tests/test_cumulative_intersects.py b/tests/test_cumulative_intersects.py index 4339b1912..bebef8bc0 100644 --- a/tests/test_cumulative_intersects.py +++ b/tests/test_cumulative_intersects.py @@ -13,10 +13,14 @@ def test_cumulative_intersects(): num_points = 10 - num_tiles_hit = torch.randint(0, 100, (num_points,), device=device, dtype=torch.int32) - - num_intersects, cum_tiles_hit = _C.compute_cumulative_intersects(num_points, num_tiles_hit) - + num_tiles_hit = torch.randint( + 0, 100, (num_points,), device=device, dtype=torch.int32 + ) + + num_intersects, cum_tiles_hit = _C.compute_cumulative_intersects( + num_points, num_tiles_hit + ) + _cum_tiles_hit = torch.cumsum(num_tiles_hit, dim=0, dtype=torch.int32) _num_intersects = _cum_tiles_hit[-1] diff --git a/tests/test_map_gaussians.py b/tests/test_map_gaussians.py index fc1653878..394fe7199 100644 --- a/tests/test_map_gaussians.py +++ b/tests/test_map_gaussians.py @@ -48,28 +48,18 @@ def test_map_gaussians(): tile_bounds, clip_thresh, ) - + _cum_tiles_hit = torch.cumsum(_num_tiles_hit, dim=0, dtype=torch.int32) _depths = _depths.contiguous() - + isect_ids, gaussian_ids = _C.map_gaussian_to_intersects( - num_points, - _xys, - _depths, - _radii, - _cum_tiles_hit, - tile_bounds + num_points, _xys, _depths, _radii, _cum_tiles_hit, tile_bounds ) - + _isect_ids, _gaussian_ids = _torch_impl.map_gaussian_to_intersects( - num_points, - _xys, - _depths, - _radii, - _cum_tiles_hit, - tile_bounds + num_points, _xys, _depths, _radii, _cum_tiles_hit, tile_bounds ) - + torch.testing.assert_close(gaussian_ids, _gaussian_ids) torch.testing.assert_close(isect_ids, _isect_ids) diff --git a/tests/test_project_gaussians.py b/tests/test_project_gaussians.py index 7519c32be..21846b5ca 100644 --- a/tests/test_project_gaussians.py +++ b/tests/test_project_gaussians.py @@ -43,7 +43,7 @@ def test_project_gaussians_forward(): projmat, fx, fy, - H, + H, W, tile_bounds, clip_thresh, @@ -77,7 +77,12 @@ def test_project_gaussians_forward(): atol=1e-5, rtol=1e-5, ) - torch.testing.assert_close(xys[_masks], _xys[_masks], atol=1e-4, rtol=1e-4,) + torch.testing.assert_close( + xys[_masks], + _xys[_masks], + atol=1e-4, + rtol=1e-4, + ) torch.testing.assert_close(depths[_masks], _depths[_masks]) torch.testing.assert_close(radii[_masks], _radii[_masks]) torch.testing.assert_close(conics[_masks], _conics[_masks]) @@ -85,4 +90,4 @@ def test_project_gaussians_forward(): if __name__ == "__main__": - test_project_gaussians_forward() \ No newline at end of file + test_project_gaussians_forward() From 5dbf8fab29e9e930b03ba7cf26526e18c04c5d7e Mon Sep 17 00:00:00 2001 From: Zhuoyang Date: Mon, 2 Oct 2023 13:46:40 -0700 Subject: [PATCH 07/10] reformat to black version 22.3.0 --- diff_rast/project_gaussians.py | 8 +------- tests/test_project_gaussians.py | 9 +-------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/diff_rast/project_gaussians.py b/diff_rast/project_gaussians.py index 1d3147e70..ad9e53a54 100644 --- a/diff_rast/project_gaussians.py +++ b/diff_rast/project_gaussians.py @@ -102,13 +102,7 @@ def backward(ctx, v_xys, v_depths, v_radii, v_conics, v_num_tiles_hit, v_cov3d): conics, ) = ctx.saved_tensors - ( - v_cov2d, - v_cov3d, - v_mean3d, - v_scale, - v_quat, - ) = _C.project_gaussians_backward( + (v_cov2d, v_cov3d, v_mean3d, v_scale, v_quat,) = _C.project_gaussians_backward( ctx.num_points, means3d, scales, diff --git a/tests/test_project_gaussians.py b/tests/test_project_gaussians.py index 21846b5ca..c74c21394 100644 --- a/tests/test_project_gaussians.py +++ b/tests/test_project_gaussians.py @@ -26,14 +26,7 @@ def test_project_gaussians_forward(): BLOCK_X, BLOCK_Y = 16, 16 tile_bounds = (W + BLOCK_X - 1) // BLOCK_X, (H + BLOCK_Y - 1) // BLOCK_Y, 1 - ( - cov3d, - xys, - depths, - radii, - conics, - num_tiles_hit, - ) = _C.project_gaussians_forward( + (cov3d, xys, depths, radii, conics, num_tiles_hit,) = _C.project_gaussians_forward( num_points, means3d, scales, From 6de479ac5ee1df0509778518e8586ac589d1a004 Mon Sep 17 00:00:00 2001 From: Zhuoyang Date: Mon, 2 Oct 2023 13:50:45 -0700 Subject: [PATCH 08/10] fix cuda check --- tests/test_project_gaussians.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_project_gaussians.py b/tests/test_project_gaussians.py index c74c21394..9c709820b 100644 --- a/tests/test_project_gaussians.py +++ b/tests/test_project_gaussians.py @@ -4,7 +4,7 @@ device = torch.device("cuda:0") -@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") +@pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device") def test_project_gaussians_forward(): from diff_rast import _torch_impl import diff_rast.cuda as _C From bcdbbc4864bd249d81efba8f8797729ce54c26ce Mon Sep 17 00:00:00 2001 From: maturk Date: Tue, 3 Oct 2023 11:36:35 +0300 Subject: [PATCH 09/10] clang-format --- diff_rast/cuda/csrc/CMakeLists.txt | 2 +- diff_rast/cuda/csrc/backward.cu | 8 +-- diff_rast/cuda/csrc/bindings.cu | 37 ++++++------ diff_rast/cuda/csrc/bindings.h | 13 +--- diff_rast/cuda/csrc/helpers.cuh | 79 +++++++++++++------------ diff_rast/cuda/csrc/serial_backward.cu | 4 +- diff_rast/cuda/csrc/serial_backward.cuh | 13 ++-- diff_rast/cuda/csrc/sh.cuh | 9 +-- 8 files changed, 78 insertions(+), 87 deletions(-) diff --git a/diff_rast/cuda/csrc/CMakeLists.txt b/diff_rast/cuda/csrc/CMakeLists.txt index 24cbc8364..12a4ba8e3 100644 --- a/diff_rast/cuda/csrc/CMakeLists.txt +++ b/diff_rast/cuda/csrc/CMakeLists.txt @@ -54,4 +54,4 @@ target_include_directories(check_serial_backward PRIVATE ) target_include_directories(check_serial_forward PRIVATE ${PROJECT_SOURCE_DIR}/third_party/glm -) +) \ No newline at end of file diff --git a/diff_rast/cuda/csrc/backward.cu b/diff_rast/cuda/csrc/backward.cu index a94a265cb..fedc475b1 100644 --- a/diff_rast/cuda/csrc/backward.cu +++ b/diff_rast/cuda/csrc/backward.cu @@ -4,7 +4,7 @@ namespace cg = cooperative_groups; -template +template __global__ void rasterize_backward_kernel( const dim3 tile_bounds, const dim3 img_size, @@ -49,7 +49,8 @@ __global__ void rasterize_backward_kernel( float T_final = final_Ts[pix_id]; float T = T_final; // the contribution from gaussians behind the current one - float S[CHANNELS] = {0.f}; // TODO: this currently doesn't match the channel count input. + float S[CHANNELS] = { + 0.f}; // TODO: this currently doesn't match the channel count input. // S[0] = 0.0; // S[1] = 0.0; // S[2] = 0.0; @@ -96,7 +97,6 @@ __global__ void rasterize_backward_kernel( S[c] += rgbs[CHANNELS * g + c] * fac; } - // v_alpha = (rgb.x * T - S.x * ra) * v_out.x // + (rgb.y * T - S.y * ra) * v_out.y // + (rgb.z * T - S.z * ra) * v_out.z; @@ -146,7 +146,7 @@ void rasterize_backward_impl( float *v_opacity ) { - rasterize_backward_kernel<3> <<>>( + rasterize_backward_kernel<3><<>>( tile_bounds, img_size, gaussians_ids_sorted, diff --git a/diff_rast/cuda/csrc/bindings.cu b/diff_rast/cuda/csrc/bindings.cu index 9b5cc8753..672384b25 100644 --- a/diff_rast/cuda/csrc/bindings.cu +++ b/diff_rast/cuda/csrc/bindings.cu @@ -31,8 +31,9 @@ __global__ void compute_cov2d_bounds_forward_kernel( float3 conic; float radius; float3 cov2d{ - (float)covs2d[index], (float)covs2d[index + 1], (float)covs2d[index + 2] - }; + (float)covs2d[index], + (float)covs2d[index + 1], + (float)covs2d[index + 2]}; compute_cov2d_bounds(cov2d, conic, radius); conics[index] = conic.x; conics[index + 1] = conic.y; @@ -256,21 +257,18 @@ project_gaussians_backward_tensor( return std::make_tuple(v_cov2d, v_cov3d, v_mean3d, v_scale, v_quat); } -std::tuple< - torch::Tensor, - torch::Tensor> -compute_cumulative_intersects_tensor( - const int num_points, - torch::Tensor &num_tiles_hit +std::tuple compute_cumulative_intersects_tensor( + const int num_points, torch::Tensor &num_tiles_hit ) { // ref: // https://nvlabs.github.io/cub/structcub_1_1_device_scan.html#a9416ac1ea26f9fde669d83ddc883795a // allocate sum workspace CHECK_INPUT(num_tiles_hit); - - torch::Tensor cum_tiles_hit = - torch::zeros({num_points}, num_tiles_hit.options().dtype(torch::kInt32)); - + + torch::Tensor cum_tiles_hit = torch::zeros( + {num_points}, num_tiles_hit.options().dtype(torch::kInt32) + ); + int32_t num_intersects; compute_cumulative_intersects( num_points, @@ -279,14 +277,15 @@ compute_cumulative_intersects_tensor( cum_tiles_hit.contiguous().data_ptr() ); - return std::make_tuple(torch::tensor(num_intersects, num_tiles_hit.options().dtype(torch::kInt32)), - cum_tiles_hit); + return std::make_tuple( + torch::tensor( + num_intersects, num_tiles_hit.options().dtype(torch::kInt32) + ), + cum_tiles_hit + ); } -std::tuple< - torch::Tensor, - torch::Tensor> -map_gaussian_to_intersects_tensor( +std::tuple map_gaussian_to_intersects_tensor( const int num_points, torch::Tensor &xys, torch::Tensor &depths, @@ -310,7 +309,7 @@ map_gaussian_to_intersects_tensor( torch::zeros({num_intersects}, xys.options().dtype(torch::kInt32)); torch::Tensor isect_ids_unsorted = torch::zeros({num_intersects}, xys.options().dtype(torch::kInt64)); - + map_gaussian_to_intersects<<< (num_points + N_THREADS - 1) / N_THREADS, N_THREADS>>>( diff --git a/diff_rast/cuda/csrc/bindings.h b/diff_rast/cuda/csrc/bindings.h index 852004824..574c121de 100644 --- a/diff_rast/cuda/csrc/bindings.h +++ b/diff_rast/cuda/csrc/bindings.h @@ -80,18 +80,11 @@ project_gaussians_backward_tensor( torch::Tensor &v_conic ); -std::tuple< - torch::Tensor, - torch::Tensor> -compute_cumulative_intersects_tensor( - const int num_points, - torch::Tensor &num_tiles_hit +std::tuple compute_cumulative_intersects_tensor( + const int num_points, torch::Tensor &num_tiles_hit ); -std::tuple< - torch::Tensor, - torch::Tensor> -map_gaussian_to_intersects_tensor( +std::tuple map_gaussian_to_intersects_tensor( const int num_points, torch::Tensor &xys, torch::Tensor &depths, diff --git a/diff_rast/cuda/csrc/helpers.cuh b/diff_rast/cuda/csrc/helpers.cuh index 884857d94..962c29dfc 100644 --- a/diff_rast/cuda/csrc/helpers.cuh +++ b/diff_rast/cuda/csrc/helpers.cuh @@ -31,7 +31,8 @@ inline __host__ __device__ void get_tile_bbox( uint2 &tile_min, uint2 &tile_max ) { - // gets gaussian dimensions in tile space, i.e. the span of a gaussian in tile_grid (image divided into tiles) + // gets gaussian dimensions in tile space, i.e. the span of a gaussian in + // tile_grid (image divided into tiles) float2 tile_center = { pix_center.x / (float)BLOCK_X, pix_center.y / (float)BLOCK_Y}; float2 tile_radius = { @@ -44,7 +45,8 @@ compute_cov2d_bounds(const float3 cov2d, float3 &conic, float &radius) { // find eigenvalues of 2d covariance matrix // expects upper triangular values of cov matrix as float3 // then compute the radius and conic dimensions - // the conic is the inverse cov2d matrix, represented here with upper triangular values. + // the conic is the inverse cov2d matrix, represented here with upper + // triangular values. float det = cov2d.x * cov2d.z - cov2d.y * cov2d.y; if (det == 0.f) return false; @@ -64,8 +66,9 @@ compute_cov2d_bounds(const float3 cov2d, float3 &conic, float &radius) { } // compute vjp from df/d_conic to df/c_cov2d -inline __host__ __device__ void -cov2d_to_conic_vjp(const float3 &conic, const float3 &v_conic, float3 &v_cov2d) { +inline __host__ __device__ void cov2d_to_conic_vjp( + const float3 &conic, const float3 &v_conic, float3 &v_cov2d +) { // conic = inverse cov2d // df/d_cov2d = -conic * df/d_conic * conic glm::mat2 X = glm::mat2(conic.x, conic.y, conic.y, conic.z); @@ -77,7 +80,8 @@ cov2d_to_conic_vjp(const float3 &conic, const float3 &v_conic, float3 &v_cov2d) } // helper for applying R * p + T, expect mat to be ROW MAJOR -inline __host__ __device__ float3 transform_4x3(const float *mat, const float3 p) { +inline __host__ __device__ float3 +transform_4x3(const float *mat, const float3 p) { float3 out = { mat[0] * p.x + mat[1] * p.y + mat[2] * p.z + mat[3], mat[4] * p.x + mat[5] * p.y + mat[6] * p.z + mat[7], @@ -88,7 +92,8 @@ inline __host__ __device__ float3 transform_4x3(const float *mat, const float3 p // helper to apply 4x4 transform to 3d vector, return homo coords // expects mat to be ROW MAJOR -inline __host__ __device__ float4 transform_4x4(const float *mat, const float3 p) { +inline __host__ __device__ float4 +transform_4x4(const float *mat, const float3 p) { float4 out = { mat[0] * p.x + mat[1] * p.y + mat[2] * p.z + mat[3], mat[4] * p.x + mat[5] * p.y + mat[6] * p.z + mat[7], @@ -117,8 +122,7 @@ inline __host__ __device__ float3 project_pix_vjp( float3 v_ndc = {0.5f * img_size.x * v_xy.x, 0.5f * img_size.y * v_xy.y}; float4 v_proj = { - v_ndc.x * rw, v_ndc.y * rw, 0., -(v_ndc.x + v_ndc.y) * rw * rw - }; + v_ndc.x * rw, v_ndc.y * rw, 0., -(v_ndc.x + v_ndc.y) * rw * rw}; // df / d_world = df / d_cam * d_cam / d_world // = v_proj * P[:3, :3] return { @@ -164,36 +168,36 @@ quat_to_rotmat_vjp(const float4 quat, const glm::mat3 v_R) { float4 v_quat; // v_R is COLUMN MAJOR // w element stored in x field - v_quat.x = 2.f * ( - // v_quat.w = 2.f * ( - x * (v_R[1][2] - v_R[2][1]) - + y * (v_R[2][0] - v_R[0][2]) - + z * (v_R[0][1] - v_R[1][0]) - ); + v_quat.x = + 2.f * ( + // v_quat.w = 2.f * ( + x * (v_R[1][2] - v_R[2][1]) + y * (v_R[2][0] - v_R[0][2]) + + z * (v_R[0][1] - v_R[1][0]) + ); // x element in y field - v_quat.y = 2.f * ( - // v_quat.x = 2.f * ( - -2.f * x * (v_R[1][1] + v_R[2][2]) - + y * (v_R[0][1] + v_R[1][0]) - + z * (v_R[0][2] + v_R[2][0]) - + w * (v_R[1][2] - v_R[2][1]) - ); + v_quat.y = + 2.f * + ( + // v_quat.x = 2.f * ( + -2.f * x * (v_R[1][1] + v_R[2][2]) + y * (v_R[0][1] + v_R[1][0]) + + z * (v_R[0][2] + v_R[2][0]) + w * (v_R[1][2] - v_R[2][1]) + ); // y element in z field - v_quat.z = 2.f * ( - // v_quat.y = 2.f * ( - x * (v_R[0][1] + v_R[1][0]) - - 2.f * y * (v_R[0][0] + v_R[2][2]) - + z * (v_R[1][2] + v_R[2][1]) - + w * (v_R[2][0] - v_R[0][2]) - ); + v_quat.z = + 2.f * + ( + // v_quat.y = 2.f * ( + x * (v_R[0][1] + v_R[1][0]) - 2.f * y * (v_R[0][0] + v_R[2][2]) + + z * (v_R[1][2] + v_R[2][1]) + w * (v_R[2][0] - v_R[0][2]) + ); // z element in w field - v_quat.w = 2.f * ( - // v_quat.z = 2.f * ( - x * (v_R[0][2] + v_R[2][0]) - + y * (v_R[1][2] + v_R[2][1]) - - 2.f * z * (v_R[0][0] + v_R[1][1]) - + w * (v_R[0][1] - v_R[1][0]) - ); + v_quat.w = + 2.f * + ( + // v_quat.z = 2.f * ( + x * (v_R[0][2] + v_R[2][0]) + y * (v_R[1][2] + v_R[2][1]) - + 2.f * z * (v_R[0][0] + v_R[1][1]) + w * (v_R[0][1] - v_R[1][0]) + ); return v_quat; } @@ -207,8 +211,9 @@ scale_to_mat(const float3 scale, const float glob_scale) { } // device helper for culling near points -inline __host__ __device__ bool -clip_near_plane(const float3 p, const float *viewmat, float3 &p_view, float thresh) { +inline __host__ __device__ bool clip_near_plane( + const float3 p, const float *viewmat, float3 &p_view, float thresh +) { p_view = transform_4x3(viewmat, p); if (p_view.z <= thresh) { return true; diff --git a/diff_rast/cuda/csrc/serial_backward.cu b/diff_rast/cuda/csrc/serial_backward.cu index 14a28d3d8..a902b3234 100644 --- a/diff_rast/cuda/csrc/serial_backward.cu +++ b/diff_rast/cuda/csrc/serial_backward.cu @@ -164,8 +164,8 @@ computeConicBackward(const float3 &cov2D, const float3 &dL_dconic) { float denom2inv = 1.0f / ((denom * denom) + 0.0000001f); if (denom2inv != 0) { - // This is slightly different from the original implementation, but we include this line to make - // equality checks easier. + // This is slightly different from the original implementation, but we + // include this line to make equality checks easier. float denom2inv = 1.0f / (denom * denom); // Gradients of loss w.r.t. entries of 2D covariance matrix, // given gradients of loss w.r.t. conic matrix (inverse covariance diff --git a/diff_rast/cuda/csrc/serial_backward.cuh b/diff_rast/cuda/csrc/serial_backward.cuh index d26d38c60..58b5930f3 100644 --- a/diff_rast/cuda/csrc/serial_backward.cuh +++ b/diff_rast/cuda/csrc/serial_backward.cuh @@ -1,23 +1,20 @@ #include "cuda_runtime.h" - __host__ __device__ float3 projectMean2DBackward( - const float3 m, const float* proj, const float2 dL_dmean2D + const float3 m, const float *proj, const float2 dL_dmean2D ); __host__ __device__ void computeCov3DBackward( const float3 scale, const float mod, const float4 rot, - const float* dL_dcov3D, + const float *dL_dcov3D, float3 &dL_dscale, float4 &dL_dq ); -__host__ __device__ float3 computeConicBackward( - const float3 &cov2D, - const float3 &dL_dconic -); +__host__ __device__ float3 +computeConicBackward(const float3 &cov2D, const float3 &dL_dconic); __host__ __device__ void computeCov2DBackward( const float3 &mean, @@ -25,7 +22,7 @@ __host__ __device__ void computeCov2DBackward( const float *view_matrix, const float h_x, const float h_y, - const float tan_fovx, + const float tan_fovx, const float tan_fovy, const float3 &dL_dcov2d, float3 &dL_dmean, diff --git a/diff_rast/cuda/csrc/sh.cuh b/diff_rast/cuda/csrc/sh.cuh index eecfb654d..f709e155d 100644 --- a/diff_rast/cuda/csrc/sh.cuh +++ b/diff_rast/cuda/csrc/sh.cuh @@ -10,8 +10,7 @@ __host__ __device__ const float SH_C2[] = { -1.0925484305920792f, 0.31539156525252005f, -1.0925484305920792f, - 0.5462742152960396f -}; + 0.5462742152960396f}; __host__ __device__ const float SH_C3[] = { -0.5900435899266435f, 2.890611442640554f, @@ -19,8 +18,7 @@ __host__ __device__ const float SH_C3[] = { 0.3731763325901154f, -0.4570457994644658f, 1.445305721320277f, - -0.5900435899266435f -}; + -0.5900435899266435f}; __host__ __device__ const float SH_C4[] = { 2.5033429417967046f, -1.7701307697799304, @@ -30,8 +28,7 @@ __host__ __device__ const float SH_C4[] = { -0.6690465435572892f, 0.47308734787878004f, -1.7701307697799304f, - 0.6258357354491761f -}; + 0.6258357354491761f}; __host__ __device__ unsigned num_sh_bases(const unsigned degree) { if (degree == 0) From e179799f44b579d5992cfc073a0076ef7a611320 Mon Sep 17 00:00:00 2001 From: maturk Date: Tue, 3 Oct 2023 11:39:34 +0300 Subject: [PATCH 10/10] extra space for consistencty --- tests/test_project_gaussians.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_project_gaussians.py b/tests/test_project_gaussians.py index 9c709820b..b0e761e43 100644 --- a/tests/test_project_gaussians.py +++ b/tests/test_project_gaussians.py @@ -1,6 +1,7 @@ import pytest import torch + device = torch.device("cuda:0")