Merge pull request #32 from alicevision/multigauss

Multigauss - various enhancements
alicevision · Jan 29, 2018 · d8700a1 · d8700a1
2 parents 2178992 + e658ced
commit d8700a1
Show file tree

Hide file tree

Showing 40 changed files with 1,976 additions and 883 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -4,8 +4,10 @@ cmake_minimum_required(VERSION 3.4)
 project(PopSift VERSION 1.0.0)
 
 OPTION(PopSift_BUILD_EXAMPLES "Build PopSift applications."  ON)
-OPTION(USE_NVTX_PROFILING     "Use CUDA NVTX for profiling." OFF)
+OPTION(PopSift_USE_NVTX_PROFILING     "Use CUDA NVTX for profiling." OFF)
+OPTION(PopSift_ERRCHK_AFTER_KERNEL     "Synchronize and check CUDA error after every kernel." OFF)
 OPTION(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON)
+OPTION(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON)
 
 if(PopSift_USE_POSITION_INDEPENDENT_CODE)
   set(CMAKE_POSITION_INDEPENDENT_CODE ON)
@@ -43,9 +45,25 @@ if(NOT CUDA_FOUND)
   message(FATAL_ERROR "Could not find CUDA >= 7.0")
 endif()
 
-if(USE_NVTX_PROFILING)
+#
+# Default setting of the CUDA CC versions to compile.
+# Shortening the lists saves a lot of compile time.
+#
+if(CUDA_VERSION_MAJOR GREATER 7)
+  set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 60 61 62)
+else(CUDA_VERSION_MAJOR GREATER 7)
+  set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 )
+endif(CUDA_VERSION_MAJOR GREATER 7)
+set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile")
+
+if(PopSift_USE_NVTX_PROFILING)
   message(STATUS "PROFILING CPU CODE: NVTX is in use")
-endif(USE_NVTX_PROFILING)
+endif(PopSift_USE_NVTX_PROFILING)
+
+if(PopSift_ERRCHK_AFTER_KERNEL)
+  message(STATUS "Synchronizing and checking errors after every kernel call")
+  set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DERRCHK_AFTER_KERNEL")
+endif(PopSift_ERRCHK_AFTER_KERNEL)
 
 set(CUDA_SEPARABLE_COMPILATION ON)
 
@@ -70,11 +88,21 @@ if(PopSift_USE_POSITION_INDEPENDENT_CODE)
   set(CUDA_NVCC_FLAGS         "${CUDA_NVCC_FLAGS};-Xcompiler;-fPIC")
 endif()
 
-set(CUDA_NVCC_FLAGS         "${CUDA_NVCC_FLAGS};-gencode;arch=compute_30,code=sm_30")
-set(CUDA_NVCC_FLAGS         "${CUDA_NVCC_FLAGS};-gencode;arch=compute_35,code=sm_35")
-set(CUDA_NVCC_FLAGS         "${CUDA_NVCC_FLAGS};-gencode;arch=compute_50,code=sm_50")
-set(CUDA_NVCC_FLAGS         "${CUDA_NVCC_FLAGS};-gencode;arch=compute_52,code=sm_52")
-set(CUDA_NVCC_FLAGS         "${CUDA_NVCC_FLAGS};-gencode;arch=compute_52,code=compute_52")
+#
+# Add all requested CUDA CCs to the command line for offline compilation
+#
+list(SORT PopSift_CUDA_CC_LIST)
+foreach(PopSift_CC_VERSION ${PopSift_CUDA_CC_LIST})
+  set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CC_VERSION},code=sm_${PopSift_CC_VERSION}")
+endforeach(PopSift_CC_VERSION)
+
+#
+# Use the highest request CUDA CC for CUDA JIT compilation
+#
+list(LENGTH PopSift_CUDA_CC_LIST PopSift_CC_LIST_LEN)
+MATH(EXPR PopSift_CC_LIST_LEN "${PopSift_CC_LIST_LEN}-1")
+list(GET PopSift_CUDA_CC_LIST ${PopSift_CC_LIST_LEN} PopSift_CUDA_CC_LIST_LAST)
+set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CUDA_CC_LIST_LAST},code=compute_${PopSift_CUDA_CC_LIST_LAST}")
 
 # default stream legacy implies that the 0 stream synchronizes all streams
 # default stream per-thread implies that each host thread has one non-synchronizing 0-stream
@@ -83,6 +111,7 @@ set(CUDA_NVCC_FLAGS         "${CUDA_NVCC_FLAGS};--default-stream;legacy")
 # set(CUDA_NVCC_FLAGS         "${CUDA_NVCC_FLAGS};--default-stream;per-thread")
 
 message(STATUS "CUDA Version is ${CUDA_VERSION}")
+message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}")
 if(CUDA_VERSION>=7.5)
   set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-lmem-usage")
   set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-spills")
@@ -94,11 +123,16 @@ endif(CUDA_VERSION>=7.5)
 # library required for CUDA dynamic parallelism, forgotten by CMake 3.4
 cuda_find_library_local_first(CUDA_CUDADEVRT_LIBRARY cudadevrt "\"cudadevrt\" library")
 
-if(USE_NVTX_PROFILING)
+if(PopSift_USE_NVTX_PROFILING)
   # library required for NVTX profiling of the CPU
   cuda_find_library_local_first(CUDA_NVTX_LIBRARY nvToolsExt "NVTX library")
   add_definitions(-DUSE_NVTX)
-endif(USE_NVTX_PROFILING)
+endif(PopSift_USE_NVTX_PROFILING)
+
+if(NOT PopSift_USE_GRID_FILTER)
+  message(STATUS "Disabling grid filter compilation")
+  add_definitions(-DDISABLE_GRID_FILTER)
+endif(NOT PopSift_USE_GRID_FILTER)
 
 add_subdirectory(src)
 

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -19,6 +19,7 @@ CUDA_ADD_LIBRARY(popsift STATIC
 	popsift/sift_extremum.h
 	popsift/sift_extremum.cu popsift/s_extrema.cu
 	popsift/s_orientation.cu
+        popsift/s_filtergrid.cu
 	popsift/sift_desc.cu
 	popsift/s_desc_loop.cu popsift/s_desc_loop.h
 	popsift/s_desc_iloop.cu popsift/s_desc_iloop.h

diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt
@@ -28,9 +28,9 @@ else()
   set(PD_COMPILE_OPTIONS "" )
 endif()
 
-if(USE_NVTX_PROFILING)
+if(PopSift_USE_NVTX_PROFILING)
   list(APPEND PD_LINK_LIBS ${CUDA_NVTX_LIBRARY})
-endif(USE_NVTX_PROFILING)
+endif(PopSift_USE_NVTX_PROFILING)
 
 #############################################################
 # popsift-demo

diff --git a/src/application/main.cpp b/src/application/main.cpp
@@ -43,6 +43,7 @@ static bool print_time_info = false;
 static bool write_as_uchar  = false;
 static bool dont_write      = false;
 static bool pgmread_loading = false;
+static bool float_mode      = false;
 
 static void parseargs(int argc, char** argv, popsift::Config& config, string& inputFile) {
     using namespace boost::program_options;
@@ -73,9 +74,10 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& in
     options_description modes("Modes");
     {
     modes.add_options()
-        ("gauss-mode", value<std::string>()->notifier([&](const std::string& s) { config.setGaussMode(s); }),
-        "Choice of span (1-sided) for Gauss filters. Default is VLFeat-like computation depending on sigma. "
-        "Options are: vlfeat, relative, opencv, fixed9, fixed15")
+        ( "gauss-mode", value<std::string>()->notifier([&](const std::string& s) { config.setGaussMode(s); }),
+          popsift::Config::getGaussModeUsage() )
+        // "Choice of span (1-sided) for Gauss filters. Default is VLFeat-like computation depending on sigma. "
+        // "Options are: vlfeat, relative, relative-all, opencv, fixed9, fixed15"
         ("desc-mode", value<std::string>()->notifier([&](const std::string& s) { config.setDescMode(s); }),
         "Choice of descriptor extraction modes:\n"
         "loop, iloop, grid, igrid, notile\n"
@@ -97,9 +99,11 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& in
         "Computed filter width are lower than VLFeat/PopSift")
         ("direct-scaling", bool_switch()->notifier([&](bool b) { if(b) config.setScalingMode(popsift::Config::ScaleDirect); }),
          "Direct each octave from upscaled orig instead of blurred level.")
-        ("root-sift", bool_switch()->notifier([&](bool b) { if(b) config.setUseRootSift(true); }),
-        "Use the L1-based norm for OpenMVG rather than L2-based as in OpenCV")
         ("norm-multi", value<int>()->notifier([&](int i) {config.setNormalizationMultiplier(i); }), "Multiply the descriptor by pow(2,<int>).")
+        ( "norm-mode", value<std::string>()->notifier([&](const std::string& s) { config.setNormMode(s); }),
+          popsift::Config::getNormModeUsage() )
+        ( "root-sift", bool_switch()->notifier([&](bool b) { if(b) config.setNormMode(popsift::Config::RootSift); }),
+          popsift::Config::getNormModeUsage() )
         ("filter-max-extrema", value<int>()->notifier([&](int f) {config.setFilterMaxExtrema(f); }), "Approximate max number of extrema.")
         ("filter-grid", value<int>()->notifier([&](int f) {config.setFilterGridSize(f); }), "Grid edge length for extrema filtering (ie. value 4 leads to a 4x4 grid)")
         ("filter-sort", value<std::string>()->notifier([&](const std::string& s) {config.setFilterSorting(s); }), "Sort extrema in each cell by scale, either random (default), up or down");
@@ -115,6 +119,7 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& in
          "Scaling to sensible ranges is not automatic, should be combined with --norm-multi=9 or similar")
         ("dont-write", bool_switch(&dont_write)->default_value(false), "Suppress descriptor output")
         ("pgmread-loading", bool_switch(&pgmread_loading)->default_value(false), "Use the old image loader instead of LibDevIL")
+        ("float-mode", bool_switch(&float_mode)->default_value(false), "Upload image to GPU as float instead of byte")
         ;
 
         //("test-direct-scaling")
@@ -164,12 +169,18 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift )
 {
     int w;
     int h;
-    unsigned char* image_data;
     SiftJob* job;
+    unsigned char* image_data;
 
 #ifdef USE_DEVIL
     if( not pgmread_loading )
     {
+        if( float_mode )
+        {
+            cerr << "Cannot combine float-mode test with DevIL image reader" << endl;
+            exit( -1 );
+        }
+
         nvtxRangePushA( "load and convert image - devil" );
 
         ilImage img;
@@ -184,11 +195,11 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift )
         w = img.Width();
         h = img.Height();
         cout << "Loading " << w << " x " << h << " image " << inputFile << endl;
+
         image_data = img.GetData();
 
         nvtxRangePop( ); // "load and convert image - devil"
 
-        // PopSift.init( w, h );
         job = PopSift.enqueue( w, h, image_data );
 
         img.Clear();
@@ -205,10 +216,25 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift )
 
         nvtxRangePop( ); // "load and convert image - pgmread"
 
-        // PopSift.init( w, h );
-        job = PopSift.enqueue( w, h, image_data );
+        if( not float_mode )
+        {
+            // PopSift.init( w, h );
+            job = PopSift.enqueue( w, h, image_data );
 
-        delete [] image_data;
+            delete [] image_data;
+        }
+        else
+        {
+            float* f_image_data = new float [w * h];
+            for( int i=0; i<w*h; i++ )
+            {
+                f_image_data[i] = float( image_data[i] ) / 256.0f;
+            }
+            job = PopSift.enqueue( w, h, f_image_data );
+
+            delete [] image_data;
+            delete [] f_image_data;
+        }
     }
 
     return job;
@@ -272,7 +298,9 @@ int main(int argc, char **argv)
     deviceInfo.set( 0, print_dev_info );
     if( print_dev_info ) deviceInfo.print( );
 
-    PopSift PopSift( config );
+    PopSift PopSift( config,
+                     popsift::Config::ExtractingMode,
+                     float_mode ? PopSift::FloatImages : PopSift::ByteImages );
 
     std::queue<SiftJob*> jobs;
     for( auto it = inputFiles.begin(); it!=inputFiles.end(); it++ ) {

diff --git a/src/application/match.cpp b/src/application/match.cpp
@@ -74,9 +74,8 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& lF
     options_description modes("Modes");
     {
     modes.add_options()
-        ("gauss-mode", value<std::string>()->notifier([&](const std::string& s) { config.setGaussMode(s); }),
-        "Choice of span (1-sided) for Gauss filters. Default is VLFeat-like computation depending on sigma. "
-        "Options are: vlfeat, relative, opencv, fixed9, fixed15")
+        ( "gauss-mode", value<std::string>()->notifier([&](const std::string& s) { config.setGaussMode(s); }),
+          popsift::Config::getGaussModeUsage() )
         ("desc-mode", value<std::string>()->notifier([&](const std::string& s) { config.setDescMode(s); }),
         "Choice of descriptor extraction modes:\n"
         "loop, iloop, grid, igrid, notile\n"
@@ -98,9 +97,11 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& lF
         "Computed filter width are lower than VLFeat/PopSift")
         ("direct-scaling", bool_switch()->notifier([&](bool b) { if(b) config.setScalingMode(popsift::Config::ScaleDirect); }),
          "Direct each octave from upscaled orig instead of blurred level.")
-        ("root-sift", bool_switch()->notifier([&](bool b) { if(b) config.setUseRootSift(true); }),
-        "Use the L1-based norm for OpenMVG rather than L2-based as in OpenCV")
         ("norm-multi", value<int>()->notifier([&](int i) {config.setNormalizationMultiplier(i); }), "Multiply the descriptor by pow(2,<int>).")
+        ( "norm-mode", value<std::string>()->notifier([&](const std::string& s) { config.setNormMode(s); }),
+          popsift::Config::getNormModeUsage() )
+        ( "root-sift", bool_switch()->notifier([&](bool b) { if(b) config.setNormMode(popsift::Config::RootSift); }),
+          popsift::Config::getNormModeUsage() )
         ("filter-max-extrema", value<int>()->notifier([&](int f) {config.setFilterMaxExtrema(f); }), "Approximate max number of extrema.")
         ("filter-grid", value<int>()->notifier([&](int f) {config.setFilterGridSize(f); }), "Grid edge length for extrema filtering (ie. value 4 leads to a 4x4 grid)")
         ("filter-sort", value<std::string>()->notifier([&](const std::string& s) {config.setFilterSorting(s); }), "Sort extrema in each cell by scale, either random (default), up or down");

diff --git a/src/application/pgmread.cpp b/src/application/pgmread.cpp
@@ -62,10 +62,11 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h )
 
     int type;
     if( pgmtype.substr(0,2) == "P2" ) type = 2;
+    else if( pgmtype.substr(0,2) == "P3" ) type = 3;
     else if( pgmtype.substr(0,2) == "P5" ) type = 5;
     else if( pgmtype.substr(0,2) == "P6" ) type = 6;
     else {
-        cerr << "File " << input_file << " can only contain P2, P5 or P6 PGM images" << endl;
+        cerr << "File " << input_file << " can only contain P2, P3, P5 or P6 PGM images" << endl;
         return 0;
     }
 
@@ -139,6 +140,42 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h )
             }
         }
         break;
+    case 3 :
+        {
+            unsigned char* i2 = new unsigned char[ w * h * 3 ];
+            unsigned char* src = i2;
+            for( int i=0; i<w*h*3; i++ ) {
+                int input;
+                pgmfile >> input;
+                if( maxval == 255 ) {
+                    i2[i] = input;
+                } else {
+                    i2[i] = (unsigned char)(input * 255.0 / maxval );
+                }
+                if( pgmfile.fail() ) {
+                    cerr << "File " << input_file << " file too short" << endl;
+                    delete [] i2;
+                    delete [] input_data;
+                    return 0;
+                }
+            }
+            for( int i=0; i<w*h; i++ ) {
+#ifdef RGB2GRAY_IN_INT
+                unsigned int r = *src; src++;
+                unsigned int g = *src; src++;
+                unsigned int b = *src; src++;
+                unsigned int res = ( ( R_RATE*r+G_RATE*g+B_RATE*b ) >> RATE_SHIFT );
+                input_data[i] = (unsigned char)res;
+#else // RGB2GRAY_IN_INT
+                float r = *src; src++;
+                float g = *src; src++;
+                float b = *src; src++;
+                input_data[i] = (unsigned char)( R_RATE*r+G_RATE*g+B_RATE*b );
+#endif // RGB2GRAY_IN_INT
+            }
+            delete [] i2;
+        }
+        break;
     case 5 :
         if( maxval < 256 ) {
             pgmfile.read( (char*)input_data, w*h );

diff --git a/src/popsift/common/debug_macros.cu b/src/popsift/common/debug_macros.cu
@@ -11,6 +11,12 @@
 
 using namespace std;
 
+void pop_sync_check_last_error( const char* file, size_t line )
+{
+    cudaDeviceSynchronize();
+    pop_check_last_error( file, line );
+}
+
 void pop_check_last_error( const char* file, size_t line )
 {
     cudaError_t err = cudaGetLastError( );

diff --git a/src/popsift/common/debug_macros.h b/src/popsift/common/debug_macros.h
@@ -14,10 +14,20 @@
 #include <assert.h>
 #include <cuda_runtime.h>
 
-void pop_check_last_error( const char* file,
-                           size_t      line );
+// synchronize device and check for an error
+void pop_sync_check_last_error( const char* file, size_t line );
+
+// check for an error without synchronizing first
+void pop_check_last_error( const char* file, size_t      line );
+
 #define POP_CHK pop_check_last_error( __FILE__, __LINE__ )
 
+#ifdef ERRCHK_AFTER_KERNEL
+#define POP_SYNC_CHK pop_sync_check_last_error( __FILE__, __LINE__ )
+#else
+#define POP_SYNC_CHK
+#endif
+
 namespace popsift {
 namespace cuda {
 void malloc_dev( void** ptr, int sz,

diff --git a/src/popsift/features.cu b/src/popsift/features.cu
@@ -293,6 +293,8 @@ void FeaturesDev::match( FeaturesDev* other )
         <<<grid,block>>>
         ( match_matrix, getDescriptors(), l_len, other->getDescriptors(), r_len );
 
+    POP_SYNC_CHK;
+
     show_distance
         <<<1,32>>>
         ( match_matrix,
@@ -305,6 +307,7 @@ void FeaturesDev::match( FeaturesDev* other )
           other->getReverseMap(),
           r_len );
 
+    POP_SYNC_CHK;
 
     cudaFree( match_matrix );
 }