Skip to content

Commit

Permalink
Merge pull request #32 from alicevision/multigauss
Browse files Browse the repository at this point in the history
Multigauss - various enhancements
  • Loading branch information
fabiencastan authored Jan 29, 2018
2 parents 2178992 + e658ced commit d8700a1
Show file tree
Hide file tree
Showing 40 changed files with 1,976 additions and 883 deletions.
54 changes: 44 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ cmake_minimum_required(VERSION 3.4)
project(PopSift VERSION 1.0.0)

OPTION(PopSift_BUILD_EXAMPLES "Build PopSift applications." ON)
OPTION(USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF)
OPTION(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF)
OPTION(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after every kernel." OFF)
OPTION(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON)
OPTION(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON)

if(PopSift_USE_POSITION_INDEPENDENT_CODE)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
Expand Down Expand Up @@ -43,9 +45,25 @@ if(NOT CUDA_FOUND)
message(FATAL_ERROR "Could not find CUDA >= 7.0")
endif()

if(USE_NVTX_PROFILING)
#
# Default setting of the CUDA CC versions to compile.
# Shortening the lists saves a lot of compile time.
#
if(CUDA_VERSION_MAJOR GREATER 7)
set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 60 61 62)
else(CUDA_VERSION_MAJOR GREATER 7)
set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 )
endif(CUDA_VERSION_MAJOR GREATER 7)
set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile")

if(PopSift_USE_NVTX_PROFILING)
message(STATUS "PROFILING CPU CODE: NVTX is in use")
endif(USE_NVTX_PROFILING)
endif(PopSift_USE_NVTX_PROFILING)

if(PopSift_ERRCHK_AFTER_KERNEL)
message(STATUS "Synchronizing and checking errors after every kernel call")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DERRCHK_AFTER_KERNEL")
endif(PopSift_ERRCHK_AFTER_KERNEL)

set(CUDA_SEPARABLE_COMPILATION ON)

Expand All @@ -70,11 +88,21 @@ if(PopSift_USE_POSITION_INDEPENDENT_CODE)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-fPIC")
endif()

set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_30,code=sm_30")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_35,code=sm_35")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_50,code=sm_50")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_52,code=sm_52")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_52,code=compute_52")
#
# Add all requested CUDA CCs to the command line for offline compilation
#
list(SORT PopSift_CUDA_CC_LIST)
foreach(PopSift_CC_VERSION ${PopSift_CUDA_CC_LIST})
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CC_VERSION},code=sm_${PopSift_CC_VERSION}")
endforeach(PopSift_CC_VERSION)

#
# Use the highest request CUDA CC for CUDA JIT compilation
#
list(LENGTH PopSift_CUDA_CC_LIST PopSift_CC_LIST_LEN)
MATH(EXPR PopSift_CC_LIST_LEN "${PopSift_CC_LIST_LEN}-1")
list(GET PopSift_CUDA_CC_LIST ${PopSift_CC_LIST_LEN} PopSift_CUDA_CC_LIST_LAST)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CUDA_CC_LIST_LAST},code=compute_${PopSift_CUDA_CC_LIST_LAST}")

# default stream legacy implies that the 0 stream synchronizes all streams
# default stream per-thread implies that each host thread has one non-synchronizing 0-stream
Expand All @@ -83,6 +111,7 @@ set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;legacy")
# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;per-thread")

message(STATUS "CUDA Version is ${CUDA_VERSION}")
message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}")
if(CUDA_VERSION>=7.5)
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-lmem-usage")
set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-spills")
Expand All @@ -94,11 +123,16 @@ endif(CUDA_VERSION>=7.5)
# library required for CUDA dynamic parallelism, forgotten by CMake 3.4
cuda_find_library_local_first(CUDA_CUDADEVRT_LIBRARY cudadevrt "\"cudadevrt\" library")

if(USE_NVTX_PROFILING)
if(PopSift_USE_NVTX_PROFILING)
# library required for NVTX profiling of the CPU
cuda_find_library_local_first(CUDA_NVTX_LIBRARY nvToolsExt "NVTX library")
add_definitions(-DUSE_NVTX)
endif(USE_NVTX_PROFILING)
endif(PopSift_USE_NVTX_PROFILING)

if(NOT PopSift_USE_GRID_FILTER)
message(STATUS "Disabling grid filter compilation")
add_definitions(-DDISABLE_GRID_FILTER)
endif(NOT PopSift_USE_GRID_FILTER)

add_subdirectory(src)

Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ CUDA_ADD_LIBRARY(popsift STATIC
popsift/sift_extremum.h
popsift/sift_extremum.cu popsift/s_extrema.cu
popsift/s_orientation.cu
popsift/s_filtergrid.cu
popsift/sift_desc.cu
popsift/s_desc_loop.cu popsift/s_desc_loop.h
popsift/s_desc_iloop.cu popsift/s_desc_iloop.h
Expand Down
4 changes: 2 additions & 2 deletions src/application/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ else()
set(PD_COMPILE_OPTIONS "" )
endif()

if(USE_NVTX_PROFILING)
if(PopSift_USE_NVTX_PROFILING)
list(APPEND PD_LINK_LIBS ${CUDA_NVTX_LIBRARY})
endif(USE_NVTX_PROFILING)
endif(PopSift_USE_NVTX_PROFILING)

#############################################################
# popsift-demo
Expand Down
50 changes: 39 additions & 11 deletions src/application/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ static bool print_time_info = false;
static bool write_as_uchar = false;
static bool dont_write = false;
static bool pgmread_loading = false;
static bool float_mode = false;

static void parseargs(int argc, char** argv, popsift::Config& config, string& inputFile) {
using namespace boost::program_options;
Expand Down Expand Up @@ -73,9 +74,10 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& in
options_description modes("Modes");
{
modes.add_options()
("gauss-mode", value<std::string>()->notifier([&](const std::string& s) { config.setGaussMode(s); }),
"Choice of span (1-sided) for Gauss filters. Default is VLFeat-like computation depending on sigma. "
"Options are: vlfeat, relative, opencv, fixed9, fixed15")
( "gauss-mode", value<std::string>()->notifier([&](const std::string& s) { config.setGaussMode(s); }),
popsift::Config::getGaussModeUsage() )
// "Choice of span (1-sided) for Gauss filters. Default is VLFeat-like computation depending on sigma. "
// "Options are: vlfeat, relative, relative-all, opencv, fixed9, fixed15"
("desc-mode", value<std::string>()->notifier([&](const std::string& s) { config.setDescMode(s); }),
"Choice of descriptor extraction modes:\n"
"loop, iloop, grid, igrid, notile\n"
Expand All @@ -97,9 +99,11 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& in
"Computed filter width are lower than VLFeat/PopSift")
("direct-scaling", bool_switch()->notifier([&](bool b) { if(b) config.setScalingMode(popsift::Config::ScaleDirect); }),
"Direct each octave from upscaled orig instead of blurred level.")
("root-sift", bool_switch()->notifier([&](bool b) { if(b) config.setUseRootSift(true); }),
"Use the L1-based norm for OpenMVG rather than L2-based as in OpenCV")
("norm-multi", value<int>()->notifier([&](int i) {config.setNormalizationMultiplier(i); }), "Multiply the descriptor by pow(2,<int>).")
( "norm-mode", value<std::string>()->notifier([&](const std::string& s) { config.setNormMode(s); }),
popsift::Config::getNormModeUsage() )
( "root-sift", bool_switch()->notifier([&](bool b) { if(b) config.setNormMode(popsift::Config::RootSift); }),
popsift::Config::getNormModeUsage() )
("filter-max-extrema", value<int>()->notifier([&](int f) {config.setFilterMaxExtrema(f); }), "Approximate max number of extrema.")
("filter-grid", value<int>()->notifier([&](int f) {config.setFilterGridSize(f); }), "Grid edge length for extrema filtering (ie. value 4 leads to a 4x4 grid)")
("filter-sort", value<std::string>()->notifier([&](const std::string& s) {config.setFilterSorting(s); }), "Sort extrema in each cell by scale, either random (default), up or down");
Expand All @@ -115,6 +119,7 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& in
"Scaling to sensible ranges is not automatic, should be combined with --norm-multi=9 or similar")
("dont-write", bool_switch(&dont_write)->default_value(false), "Suppress descriptor output")
("pgmread-loading", bool_switch(&pgmread_loading)->default_value(false), "Use the old image loader instead of LibDevIL")
("float-mode", bool_switch(&float_mode)->default_value(false), "Upload image to GPU as float instead of byte")
;

//("test-direct-scaling")
Expand Down Expand Up @@ -164,12 +169,18 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift )
{
int w;
int h;
unsigned char* image_data;
SiftJob* job;
unsigned char* image_data;

#ifdef USE_DEVIL
if( not pgmread_loading )
{
if( float_mode )
{
cerr << "Cannot combine float-mode test with DevIL image reader" << endl;
exit( -1 );
}

nvtxRangePushA( "load and convert image - devil" );

ilImage img;
Expand All @@ -184,11 +195,11 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift )
w = img.Width();
h = img.Height();
cout << "Loading " << w << " x " << h << " image " << inputFile << endl;

image_data = img.GetData();

nvtxRangePop( ); // "load and convert image - devil"

// PopSift.init( w, h );
job = PopSift.enqueue( w, h, image_data );

img.Clear();
Expand All @@ -205,10 +216,25 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift )

nvtxRangePop( ); // "load and convert image - pgmread"

// PopSift.init( w, h );
job = PopSift.enqueue( w, h, image_data );
if( not float_mode )
{
// PopSift.init( w, h );
job = PopSift.enqueue( w, h, image_data );

delete [] image_data;
delete [] image_data;
}
else
{
float* f_image_data = new float [w * h];
for( int i=0; i<w*h; i++ )
{
f_image_data[i] = float( image_data[i] ) / 256.0f;
}
job = PopSift.enqueue( w, h, f_image_data );

delete [] image_data;
delete [] f_image_data;
}
}

return job;
Expand Down Expand Up @@ -272,7 +298,9 @@ int main(int argc, char **argv)
deviceInfo.set( 0, print_dev_info );
if( print_dev_info ) deviceInfo.print( );

PopSift PopSift( config );
PopSift PopSift( config,
popsift::Config::ExtractingMode,
float_mode ? PopSift::FloatImages : PopSift::ByteImages );

std::queue<SiftJob*> jobs;
for( auto it = inputFiles.begin(); it!=inputFiles.end(); it++ ) {
Expand Down
11 changes: 6 additions & 5 deletions src/application/match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,8 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& lF
options_description modes("Modes");
{
modes.add_options()
("gauss-mode", value<std::string>()->notifier([&](const std::string& s) { config.setGaussMode(s); }),
"Choice of span (1-sided) for Gauss filters. Default is VLFeat-like computation depending on sigma. "
"Options are: vlfeat, relative, opencv, fixed9, fixed15")
( "gauss-mode", value<std::string>()->notifier([&](const std::string& s) { config.setGaussMode(s); }),
popsift::Config::getGaussModeUsage() )
("desc-mode", value<std::string>()->notifier([&](const std::string& s) { config.setDescMode(s); }),
"Choice of descriptor extraction modes:\n"
"loop, iloop, grid, igrid, notile\n"
Expand All @@ -98,9 +97,11 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& lF
"Computed filter width are lower than VLFeat/PopSift")
("direct-scaling", bool_switch()->notifier([&](bool b) { if(b) config.setScalingMode(popsift::Config::ScaleDirect); }),
"Direct each octave from upscaled orig instead of blurred level.")
("root-sift", bool_switch()->notifier([&](bool b) { if(b) config.setUseRootSift(true); }),
"Use the L1-based norm for OpenMVG rather than L2-based as in OpenCV")
("norm-multi", value<int>()->notifier([&](int i) {config.setNormalizationMultiplier(i); }), "Multiply the descriptor by pow(2,<int>).")
( "norm-mode", value<std::string>()->notifier([&](const std::string& s) { config.setNormMode(s); }),
popsift::Config::getNormModeUsage() )
( "root-sift", bool_switch()->notifier([&](bool b) { if(b) config.setNormMode(popsift::Config::RootSift); }),
popsift::Config::getNormModeUsage() )
("filter-max-extrema", value<int>()->notifier([&](int f) {config.setFilterMaxExtrema(f); }), "Approximate max number of extrema.")
("filter-grid", value<int>()->notifier([&](int f) {config.setFilterGridSize(f); }), "Grid edge length for extrema filtering (ie. value 4 leads to a 4x4 grid)")
("filter-sort", value<std::string>()->notifier([&](const std::string& s) {config.setFilterSorting(s); }), "Sort extrema in each cell by scale, either random (default), up or down");
Expand Down
39 changes: 38 additions & 1 deletion src/application/pgmread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,11 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h )

int type;
if( pgmtype.substr(0,2) == "P2" ) type = 2;
else if( pgmtype.substr(0,2) == "P3" ) type = 3;
else if( pgmtype.substr(0,2) == "P5" ) type = 5;
else if( pgmtype.substr(0,2) == "P6" ) type = 6;
else {
cerr << "File " << input_file << " can only contain P2, P5 or P6 PGM images" << endl;
cerr << "File " << input_file << " can only contain P2, P3, P5 or P6 PGM images" << endl;
return 0;
}

Expand Down Expand Up @@ -139,6 +140,42 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h )
}
}
break;
case 3 :
{
unsigned char* i2 = new unsigned char[ w * h * 3 ];
unsigned char* src = i2;
for( int i=0; i<w*h*3; i++ ) {
int input;
pgmfile >> input;
if( maxval == 255 ) {
i2[i] = input;
} else {
i2[i] = (unsigned char)(input * 255.0 / maxval );
}
if( pgmfile.fail() ) {
cerr << "File " << input_file << " file too short" << endl;
delete [] i2;
delete [] input_data;
return 0;
}
}
for( int i=0; i<w*h; i++ ) {
#ifdef RGB2GRAY_IN_INT
unsigned int r = *src; src++;
unsigned int g = *src; src++;
unsigned int b = *src; src++;
unsigned int res = ( ( R_RATE*r+G_RATE*g+B_RATE*b ) >> RATE_SHIFT );
input_data[i] = (unsigned char)res;
#else // RGB2GRAY_IN_INT
float r = *src; src++;
float g = *src; src++;
float b = *src; src++;
input_data[i] = (unsigned char)( R_RATE*r+G_RATE*g+B_RATE*b );
#endif // RGB2GRAY_IN_INT
}
delete [] i2;
}
break;
case 5 :
if( maxval < 256 ) {
pgmfile.read( (char*)input_data, w*h );
Expand Down
6 changes: 6 additions & 0 deletions src/popsift/common/debug_macros.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@

using namespace std;

void pop_sync_check_last_error( const char* file, size_t line )
{
cudaDeviceSynchronize();
pop_check_last_error( file, line );
}

void pop_check_last_error( const char* file, size_t line )
{
cudaError_t err = cudaGetLastError( );
Expand Down
14 changes: 12 additions & 2 deletions src/popsift/common/debug_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,20 @@
#include <assert.h>
#include <cuda_runtime.h>

void pop_check_last_error( const char* file,
size_t line );
// synchronize device and check for an error
void pop_sync_check_last_error( const char* file, size_t line );

// check for an error without synchronizing first
void pop_check_last_error( const char* file, size_t line );

#define POP_CHK pop_check_last_error( __FILE__, __LINE__ )

#ifdef ERRCHK_AFTER_KERNEL
#define POP_SYNC_CHK pop_sync_check_last_error( __FILE__, __LINE__ )
#else
#define POP_SYNC_CHK
#endif

namespace popsift {
namespace cuda {
void malloc_dev( void** ptr, int sz,
Expand Down
3 changes: 3 additions & 0 deletions src/popsift/features.cu
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,8 @@ void FeaturesDev::match( FeaturesDev* other )
<<<grid,block>>>
( match_matrix, getDescriptors(), l_len, other->getDescriptors(), r_len );

POP_SYNC_CHK;

show_distance
<<<1,32>>>
( match_matrix,
Expand All @@ -305,6 +307,7 @@ void FeaturesDev::match( FeaturesDev* other )
other->getReverseMap(),
r_len );

POP_SYNC_CHK;

cudaFree( match_matrix );
}
Expand Down
Loading

0 comments on commit d8700a1

Please sign in to comment.