Skip to content

Commit

Permalink
small optimizations
Browse files Browse the repository at this point in the history
  • Loading branch information
pizzoli committed Nov 14, 2015
1 parent be78399 commit e0668a6
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 29 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ SET(CMAKE_BUILD_TYPE Release) # Release, RelWithDebInfo

find_package(CUDA REQUIRED)
SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-O3)

# Specify compute capability
#list(APPEND CUDA_NVCC_FLAGS -arch=sm_30)
#list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30 -use_fast_math)

SET(CUDA_PROPAGATE_HOST_FLAGS OFF)

SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
Expand Down
4 changes: 3 additions & 1 deletion include/rmd/mvs_device_data.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@ struct CorrPatch
#ifndef RMD_CORR_PATCH_SIDE
#define RMD_CORR_PATCH_SIDE 5
#endif
#define RMD_CORR_PATCH_OFFSET -RMD_CORR_PATCH_SIDE/2
CorrPatch()
: side(RMD_CORR_PATCH_SIDE)
, offset(make_int2(-side/2, -side/2))
, offset(make_int2(RMD_CORR_PATCH_OFFSET,
RMD_CORR_PATCH_OFFSET))
{ }
const int side;
const int2 offset;
Expand Down
3 changes: 3 additions & 0 deletions include/rmd/seed_matrix.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ private:
dim3 dim_grid_;
// Image reduction to compute seed statistics
ImageReducer<int> *img_reducer_;

// Image size to be copied to constant memory
int host_img_size_xy_[2];
};

} // rmd namespace
Expand Down
4 changes: 4 additions & 0 deletions include/rmd/texture_memory.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ texture<float2, cudaTextureType2D, cudaReadModeElementType> epipolar_matches_tex

texture<float, cudaTextureType2D, cudaReadModeElementType> g_tex;

// Pre-computed template statistics
texture<float, cudaTextureType2D, cudaReadModeElementType> sum_templ_tex;
texture<float, cudaTextureType2D, cudaReadModeElementType> const_templ_denom_tex;

template<typename ElementType>
inline void bindTexture(
texture<ElementType, cudaTextureType2D> &tex,
Expand Down
54 changes: 26 additions & 28 deletions src/epipolar_match.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@
namespace rmd
{

__constant__
int c_img_size_xy[2];

extern "C" void copyImgSzXY2Const(int *h_img_size_xy)
{
cudaMemcpyToSymbol(c_img_size_xy, h_img_size_xy, 2*sizeof(int));
}

__global__
void seedEpipolarMatchKernel(
mvs::DeviceData *dev_ptr,
Expand All @@ -36,7 +44,7 @@ void seedEpipolarMatchKernel(
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;

if(x >= dev_ptr->width || y >= dev_ptr->height)
if(x >= c_img_size_xy[0] || y >= c_img_size_xy[1])
return;

const float xx = x+0.5f;
Expand All @@ -59,15 +67,6 @@ void seedEpipolarMatchKernel(
const float2 px_mean_curr =
dev_ptr->cam.world2cam( T_curr_ref * (f_ref * mu) );

if( (px_mean_curr.x >= dev_ptr->width) ||
(px_mean_curr.y >= dev_ptr->height) ||
(px_mean_curr.x < 0) ||
(px_mean_curr.y < 0) )
{
dev_ptr->convergence->atXY(x, y) = ConvergenceStates::NOT_VISIBLE;
return;
}

const float2 px_min_curr =
dev_ptr->cam.world2cam( T_curr_ref * (f_ref * fmaxf( mu - 3.0f*sigma, 0.01f)) );
const float2 px_max_curr =
Expand All @@ -78,13 +77,10 @@ void seedEpipolarMatchKernel(
const float half_length = 0.5f * fminf(norm(epi_line), RMD_MAX_EXTENT_EPIPOLAR_SEARCH);
float2 px_curr, best_px_curr;

const int &side = dev_ptr->patch.side;
const int2 &offset = dev_ptr->patch.offset;
const float n = (float)side * (float)side;
// Retrieve template statistics for NCC matching;
const float sum_templ = tex2D(sum_templ_tex, xx, yy);
const float const_templ_denom = tex2D(const_templ_denom_tex, xx, yy);

// Retrieve template statistics for NCC matching
const float sum_templ = dev_ptr->sum_templ->atXY(x, y);
const float const_templ_denom = dev_ptr->const_templ_denom->atXY(x, y);
// init best match score
float best_ncc = -1.0f;

Expand All @@ -94,10 +90,10 @@ void seedEpipolarMatchKernel(
for(float l = -half_length; l <= half_length; l += 0.7f)
{
px_curr = px_mean_curr + l*epi_dir;
if( (px_curr.x >= dev_ptr->width - dev_ptr->patch.side) ||
(px_curr.y >= dev_ptr->height - dev_ptr->patch.side) ||
(px_curr.x < dev_ptr->patch.side) ||
(px_curr.y < dev_ptr->patch.side) )
if( (px_curr.x >= c_img_size_xy[0] - RMD_CORR_PATCH_SIDE) ||
(px_curr.y >= c_img_size_xy[1] - RMD_CORR_PATCH_SIDE) ||
(px_curr.x < RMD_CORR_PATCH_SIDE) ||
(px_curr.y < RMD_CORR_PATCH_SIDE) )
{
continue;
}
Expand All @@ -106,25 +102,27 @@ void seedEpipolarMatchKernel(
sum_img_sq = 0.0f;
sum_img_templ = 0.0f;

for(int patch_y=0; patch_y<side; ++patch_y)
for(int patch_y=0; patch_y<RMD_CORR_PATCH_SIDE; ++patch_y)
{
for(int patch_x=0; patch_x<side; ++patch_x)
for(int patch_x=0; patch_x<RMD_CORR_PATCH_SIDE; ++patch_x)
{
const float templ = tex2D(
ref_img_tex,
px_ref.x+(float)(offset.x+patch_x)+0.5f,
px_ref.y+(float)(offset.y+patch_y)+0.5f);
px_ref.x+(float)(RMD_CORR_PATCH_OFFSET+patch_x)+0.5f,
px_ref.y+(float)(RMD_CORR_PATCH_OFFSET+patch_y)+0.5f);
const float img = tex2D(
curr_img_tex,
px_curr.x+(float)(offset.x+patch_x)+0.5f,
px_curr.y+(float)(offset.y+patch_y)+0.5f);
px_curr.x+(float)(RMD_CORR_PATCH_OFFSET+patch_x)+0.5f,
px_curr.y+(float)(RMD_CORR_PATCH_OFFSET+patch_y)+0.5f);
sum_img += img;
sum_img_sq += img*img;
sum_img_templ += img*templ;
}
}
const float ncc_numerator = n*sum_img_templ - sum_img*sum_templ;
const float ncc_denominator = (n*sum_img_sq - sum_img*sum_img)*const_templ_denom;
#define RMD_CORR_PATCH_AREA RMD_CORR_PATCH_SIDE*RMD_CORR_PATCH_SIDE
const float ncc_numerator = RMD_CORR_PATCH_AREA*sum_img_templ - sum_img*sum_templ;
const float ncc_denominator = (RMD_CORR_PATCH_AREA*sum_img_sq - sum_img*sum_img)*const_templ_denom;
#undef RMD_CORR_PATCH_AREA
const float ncc = ncc_numerator * rsqrtf(ncc_denominator + FLT_MIN);

if(ncc > best_ncc)
Expand Down
7 changes: 7 additions & 0 deletions src/seed_matrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.

#include <rmd/seed_matrix.cuh>

#include <rmd/texture_memory.cuh>
#include <rmd/helper_vector_types.cuh>

Expand Down Expand Up @@ -58,6 +59,8 @@ rmd::SeedMatrix::SeedMatrix(
dev_data_.one_pix_angle = cam.getOnePixAngle();
dev_data_.width = width;
dev_data_.height = height;
host_img_size_xy_[0] = width_;
host_img_size_xy_[1] = height_;

// Kernel configuration for depth estimation
dim_block_.x = 16;
Expand Down Expand Up @@ -107,6 +110,9 @@ bool rmd::SeedMatrix::setReferenceImage(

rmd::seedInitKernel<<<dim_grid_, dim_block_>>>(dev_data_.dev_ptr);
cudaDeviceSynchronize();

rmd::bindTexture(sum_templ_tex, sum_templ_, cudaFilterModePoint);
rmd::bindTexture(const_templ_denom_tex, const_templ_denom_, cudaFilterModePoint);
return true;
}

Expand Down Expand Up @@ -136,6 +142,7 @@ bool rmd::SeedMatrix::update(

// Establish epipolar correspondences
// call epipolar matching kernel
rmd::copyImgSzXY2Const(host_img_size_xy_);
rmd::seedEpipolarMatchKernel<<<dim_grid_, dim_block_>>>(dev_data_.dev_ptr, T_curr_ref);
err = cudaDeviceSynchronize();
if(cudaSuccess != err)
Expand Down
3 changes: 3 additions & 0 deletions test/dataset_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,10 @@ int main(int argc, char **argv)
}
else
{
double t = (double)cv::getTickCount();
depthmap.update(img, T_world_curr.inv());
t = ((double)cv::getTickCount() - t)/cv::getTickFrequency();
printf("\nUPDATE execution time: %f seconds.\n", t);
}
}

Expand Down

0 comments on commit e0668a6

Please sign in to comment.