diff --git a/CMakeLists.txt b/CMakeLists.txt index a9c033cb..2b46a58b 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,6 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-$ option(PopSift_BUILD_EXAMPLES "Build PopSift applications." ON) option(PopSift_BUILD_DOCS "Build PopSift documentation." OFF) -option(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF) option(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after every kernel." OFF) option(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON) option(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON) @@ -90,10 +89,6 @@ find_package(CUDAToolkit) message(STATUS "CUDA Version is ${CUDAToolkit_VERSION}") set(CUDA_VERSION ${CUDAToolkit_VERSION}) -if(PopSift_USE_NVTX_PROFILING) - message(STATUS "PROFILING CPU CODE: NVTX is in use") -endif() - if(PopSift_ERRCHK_AFTER_KERNEL) message(STATUS "Synchronizing and checking errors after every kernel call") list(APPEND CUDA_NVCC_FLAGS "-DERRCHK_AFTER_KERNEL") @@ -150,13 +145,6 @@ else() set(DISABLE_GRID_FILTER 0) endif() -if(PopSift_USE_NVTX_PROFILING) - # library required for NVTX profiling of the CPU - set(PopSift_USE_NVTX 1) -else() - set(PopSift_USE_NVTX 0) -endif() - add_subdirectory(src) if(PopSift_BUILD_DOCS) @@ -194,7 +182,6 @@ message(STATUS "Build Shared libs: " ${BUILD_SHARED_LIBS}) message(STATUS "Build examples: " ${PopSift_BUILD_EXAMPLES}) message(STATUS "Build documentation: " ${PopSift_BUILD_DOCS}) message(STATUS "Generate position independent code: " ${CMAKE_POSITION_INDEPENDENT_CODE}) -message(STATUS "Use CUDA NVTX for profiling: " ${PopSift_USE_NVTX_PROFILING}) message(STATUS "Synchronize and check CUDA error after every kernel: " ${PopSift_ERRCHK_AFTER_KERNEL}) message(STATUS "Grid filtering: " ${PopSift_USE_GRID_FILTER}) message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS}) diff --git a/appveyor.yml b/appveyor.yml index 679ac5b5..2f01c648 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -44,7 +44,7 @@ install: before_build: - md build - cd build - - cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS:BOOL=ON -DPopSift_USE_NVTX_PROFILING:BOOL=OFF -DPopSift_USE_GRID_FILTER:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=ON -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. + - cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS:BOOL=ON -DPopSift_USE_GRID_FILTER:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=ON -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. - ls -l build: diff --git a/cmake/sift_config.h.in b/cmake/sift_config.h.in index 86095a55..7b4d19d5 100644 --- a/cmake/sift_config.h.in +++ b/cmake/sift_config.h.in @@ -14,5 +14,4 @@ #define POPSIFT_HAVE_SHFL_DOWN_SYNC() @PopSift_HAVE_SHFL_DOWN_SYNC@ #define POPSIFT_HAVE_NORMF() @PopSift_HAVE_NORMF@ #define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@ -#define POPSIFT_USE_NVTX() @PopSift_USE_NVTX@ diff --git a/cudaInstallAppveyor.cmd b/cudaInstallAppveyor.cmd index 9d43f7fa..c59938a4 100644 --- a/cudaInstallAppveyor.cmd +++ b/cudaInstallAppveyor.cmd @@ -4,14 +4,12 @@ echo Downloading CUDA toolkit 12 for Windows 10 appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvcc.zip appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.5.82-archive.zip -Filename cuda_cudart.zip -appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvtx.zip appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.5.82-archive.zip -Filename vs_integration.zip dir echo Unzipping CUDA toolkit 12 tar -xf cuda_nvcc.zip tar -xf cuda_cudart.zip -tar -xf cuda_nvtx.zip tar -xf vs_integration.zip dir @@ -22,7 +20,6 @@ mkdir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras" echo Copying toolkit files to install dir(s) xcopy cuda_cudart-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y xcopy cuda_nvcc-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y -xcopy cuda_nvtx-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y xcopy visual_studio_integration-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras" /s /e /i /y diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ff3b3681..bd4b5faa 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -45,12 +45,6 @@ target_link_libraries(popsift CUDA::cudart Threads::Threads) -if(PopSift_USE_NVTX_PROFILING) -target_link_libraries(popsift - PUBLIC - CUDA::nvtx3) -endif() - set_target_properties(popsift PROPERTIES VERSION ${PROJECT_VERSION}) set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d") set_target_properties(popsift PROPERTIES CUDA_SEPARABLE_COMPILATION ON) diff --git a/src/popsift/popsift.cu b/src/popsift/popsift.cu index 09575772..46ac8467 100755 --- a/src/popsift/popsift.cu +++ b/src/popsift/popsift.cu @@ -438,18 +438,12 @@ void SiftJob::setImg( popsift::ImageBase* img ) popsift::ImageBase* SiftJob::getImg() { -#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) - _nvtx_id = nvtxRangeStartA( "inserting image" ); -#endif return _img; } void SiftJob::setFeatures( popsift::FeaturesBase* f ) { _p.set_value( f ); -#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) - nvtxRangeEnd( _nvtx_id ); -#endif } popsift::FeaturesHost* SiftJob::get() diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 5654cc76..4c555400 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -23,13 +23,6 @@ #include #include -#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include -#else -#define nvtxRangeStartA(a) -#define nvtxRangeEnd(a) -#endif - /* user parameters */ namespace popsift { @@ -50,9 +43,6 @@ class SiftJob unsigned char* _imageData; popsift::ImageBase* _img; std::exception_ptr _err; -#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) - nvtxRangeId_t _nvtx_id; -#endif public: diff --git a/src/popsift/s_filtergrid.cu b/src/popsift/s_filtergrid.cu index bfe2e64e..ba973c97 100644 --- a/src/popsift/s_filtergrid.cu +++ b/src/popsift/s_filtergrid.cu @@ -9,13 +9,6 @@ #include "sift_extremum.h" #include "sift_pyramid.h" -#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include -#else -#define nvtxRangePushA(a) -#define nvtxRangePop() -#endif - #if ! POPSIFT_IS_DEFINED(POPSIFT_DISABLE_GRID_FILTER) #include @@ -317,9 +310,7 @@ int Pyramid::extrema_filter_grid( const Config& conf, int ext_total ) } } - nvtxRangePushA( "writing back count" ); writeDescCountersToDevice( ); - nvtxRangePop( ); return ret_ext_total; } diff --git a/src/popsift/s_image.cu b/src/popsift/s_image.cu index a966dd39..6e0d217e 100755 --- a/src/popsift/s_image.cu +++ b/src/popsift/s_image.cu @@ -15,13 +15,6 @@ #include #include -#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include -#else -#define nvtxRangePushA(a) -#define nvtxRangePop() -#endif - using namespace std; namespace popsift { @@ -98,8 +91,6 @@ void Image::resetDimensions( int w, int h ) destroyTexture( ); createTexture( ); } else { - nvtxRangePushA( "reallocating host-side image memory" ); - _max_w = max( w, _max_w ); _max_h = max( h, _max_h ); _input_image_h.freeHost( popsift::CudaAllocated ); @@ -111,21 +102,15 @@ void Image::resetDimensions( int w, int h ) destroyTexture( ); createTexture( ); - - nvtxRangePop(); // "reallocating host-side image memory" } } void Image::allocate( int w, int h ) { - nvtxRangePushA( "allocating host-side image memory" ); - _input_image_h.allocHost( w, h, popsift::CudaAllocated ); _input_image_d.allocDev( w, h ); createTexture( ); - - nvtxRangePop(); // "allocating host-side image memory" } void Image::destroyTexture( ) @@ -222,8 +207,6 @@ void ImageFloat::resetDimensions( int w, int h ) destroyTexture( ); createTexture( ); } else { - nvtxRangePushA( "reallocating host-side image memory" ); - _max_w = max( w, _max_w ); _max_h = max( h, _max_h ); _input_image_h.freeHost( popsift::CudaAllocated ); @@ -235,21 +218,15 @@ void ImageFloat::resetDimensions( int w, int h ) destroyTexture( ); createTexture( ); - - nvtxRangePop(); // "reallocating host-side image memory" } } void ImageFloat::allocate( int w, int h ) { - nvtxRangePushA( "allocating host-side image memory" ); - _input_image_h.allocHost( w, h, popsift::CudaAllocated ); _input_image_d.allocDev( w, h ); createTexture( ); - - nvtxRangePop(); // "allocating host-side image memory" } void ImageFloat::destroyTexture( ) diff --git a/src/popsift/s_orientation.cu b/src/popsift/s_orientation.cu index b34aaaa1..1f75229f 100644 --- a/src/popsift/s_orientation.cu +++ b/src/popsift/s_orientation.cu @@ -18,13 +18,6 @@ #include #include -#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include -#else -#define nvtxRangePushA(a) -#define nvtxRangePop() -#endif - using namespace popsift; using namespace std; diff --git a/src/popsift/sift_desc.cu b/src/popsift/sift_desc.cu index f533df35..4632298a 100644 --- a/src/popsift/sift_desc.cu +++ b/src/popsift/sift_desc.cu @@ -21,13 +21,6 @@ #include #include -#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include -#else -#define nvtxRangePushA(a) -#define nvtxRangePop() -#endif - using namespace popsift; using namespace std; @@ -55,11 +48,8 @@ using namespace std; __host__ void Pyramid::descriptors( const Config& conf ) { - nvtxRangePushA("Reading orientation count"); - readDescCountersFromDevice( _octaves[0].getStream() ); cudaStreamSynchronize( _octaves[0].getStream() ); - nvtxRangePop( ); for( int octave=_num_octaves-1; octave>=0; octave-- ) // for( int octave=0; octave<_num_octaves; octave++ ) diff --git a/src/popsift/sift_pyramid.cu b/src/popsift/sift_pyramid.cu index c03b0d61..340cffff 100644 --- a/src/popsift/sift_pyramid.cu +++ b/src/popsift/sift_pyramid.cu @@ -25,13 +25,6 @@ #define mkdir(path, perm) _mkdir(path) #endif -#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include -#else -#define nvtxRangePushA(a) -#define nvtxRangePop() -#endif - #define PYRAMID_PRINT_DEBUG 0 using namespace std; @@ -285,12 +278,10 @@ FeaturesHost* Pyramid::get_descriptors( const Config& conf ) readDescCountersFromDevice(); - nvtxRangePushA( "download descriptors" ); FeaturesHost* features = new FeaturesHost( hct.ext_total, hct.ori_total ); if( hct.ext_total == 0 || hct.ori_total == 0 ) { - nvtxRangePop(); return features; } @@ -298,9 +289,7 @@ FeaturesHost* Pyramid::get_descriptors( const Config& conf ) prep_features<<>>( features->getDescriptors(), up_fac ); POP_SYNC_CHK; - nvtxRangePushA( "register host memory" ); features->pin( ); - nvtxRangePop(); popcuda_memcpy_async( features->getFeatures(), dobuf_shadow.features, hct.ext_total * sizeof(Feature), @@ -313,10 +302,7 @@ FeaturesHost* Pyramid::get_descriptors( const Config& conf ) cudaMemcpyDeviceToHost, _download_stream ); cudaStreamSynchronize( _download_stream ); - nvtxRangePushA( "unregister host memory" ); features->unpin( ); - nvtxRangePop(); - nvtxRangePop(); return features; }