diff --git a/src/c/Cuda/CHelpers.cpp b/src/c/Cuda/CHelpers.cpp index 52fdbf9..2ac67b1 100644 --- a/src/c/Cuda/CHelpers.cpp +++ b/src/c/Cuda/CHelpers.cpp @@ -13,9 +13,9 @@ float* createEllipsoidKernel(Vec radii, Vec& kernelDim std::memset(kernel,0,sizeof(float)*kernelDims.product()); Vec mid((kernelDims-1)/2); - Vec dimScale = Vec(1,1,1) / Vec(radii.pwr(2)); + Vec dimScale = Vec(1.0f) / Vec(radii.pwr(2)); - Vec cur(0,0,0); + Vec cur(0); for (cur.z=0; cur.z -#include +#ifdef _WIN32 + #define DLL_EXPORT_API __declspec(dllexport) + #define DLL_IMPORT_API __declspec(dllimport) +#else + #define DLL_EXPORT_API __attribute__((__visibility("default"))) + #define DLL_IMPORT_API +#endif #ifdef IMAGE_PROCESSOR_DLL -#ifdef IMAGE_PROCESSOR_INTERNAL -#define IMAGE_PROCESSOR_API __declspec(dllexport) -#else -#define IMAGE_PROCESSOR_API __declspec(dllimport) -#endif // IMAGE_PROCESSOR_INTERNAL + #ifdef IMAGE_PROCESSOR_EXPORT + #define IMAGE_PROCESSOR_API DLL_EXPORT_API + #else + #define IMAGE_PROCESSOR_API DLL_IMPORT_API + #endif // IMAGE_PROCESSOR_EXPORT #else -#define IMAGE_PROCESSOR_API + #define IMAGE_PROCESSOR_API #endif // IMAGE_PROCESSOR_DLL +#include "Vec.h" +#include +#include + IMAGE_PROCESSOR_API float* createEllipsoidKernel(Vec radii, Vec& kernelDims); IMAGE_PROCESSOR_API int calcOtsuThreshold(const double* normHistogram, int numBins); diff --git a/src/c/Cuda/CMakeLists.txt b/src/c/Cuda/CMakeLists.txt index d4cb9b4..83884ee 100644 --- a/src/c/Cuda/CMakeLists.txt +++ b/src/c/Cuda/CMakeLists.txt @@ -1,9 +1,8 @@ # Setup backend Hydra CUDA library (static) for cuda building -add_library(HydraCudaLib STATIC "") +add_library(HydraCudaStatic STATIC "") -target_compile_features(HydraCudaLib PRIVATE cxx_std_11) -target_link_libraries(HydraCudaLib PRIVATE OpenMP::OpenMP_CXX) -set_property(TARGET HydraCudaLib PROPERTY CUDA_STANDARD 11) +target_compile_features(HydraCudaStatic PRIVATE cxx_std_11) +target_link_libraries(HydraCudaStatic PRIVATE OpenMP::OpenMP_CXX) # Set a variable to turn on/off PROCESS_MUTEX support set(USE_PROCESS_MUTEX ON CACHE BOOL "Use process-level mutex to guard GPU calls") @@ -18,15 +17,16 @@ set(USE_PROCESS_MUTEX ON CACHE BOOL "Use process-level mutex to guard GPU calls" # string(APPEND CMAKE_CUDA_FLAGS " -gencode=arch=compute_61,code=sm_61") # string(APPEND CMAKE_CUDA_FLAGS " -gencode=arch=compute_70,code=compute_70") -set_target_properties(HydraCudaLib PROPERTIES CUDA_ARCHITECTURES "52;60;61;70") - -set_target_properties(HydraCudaLib +set_target_properties(HydraCudaStatic PROPERTIES POSITION_INDEPENDENT_CODE ON + CUDA_STANDARD 11 CUDA_SEPARABLE_COMPILATION ON - CUDA_RESOLVE_DEVICE_SYMBOLS ON) + CUDA_RESOLVE_DEVICE_SYMBOLS ON + CUDA_ARCHITECTURES "52;60;61;70" +) -target_sources(HydraCudaLib +target_sources(HydraCudaStatic PUBLIC BufferConversions.h CHelpers.h @@ -89,7 +89,101 @@ target_sources(HydraCudaLib KernelIterator.cu ) -target_include_directories(HydraCudaLib +target_include_directories(HydraCudaStatic + PUBLIC + $ + $ + $ + PRIVATE + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} +) + +# Backend shared lib for dynamic c++ linking +add_library(HydraCuda SHARED "") + +target_compile_features(HydraCuda PRIVATE cxx_std_11) +target_link_libraries(HydraCuda + PRIVATE + OpenMP::OpenMP_CXX +) + +# Set a variable to turn on/off PROCESS_MUTEX support +# set(USE_PROCESS_MUTEX ON CACHE BOOL "Use process-level mutex to guard GPU calls") +set_target_properties(HydraCuda + PROPERTIES + CUDA_STANDARD 11 + CUDA_ARCHITECTURES "52;60;61;70" + CUDA_SEPARABLE_COMPILATION ON + CUDA_RESOLVE_DEVICE_SYMBOLS ON + CXX_VISIBILITY_PRESET hidden + POSITION_INDEPENDENT_CODE ON + LIBRARY_OUTPUT_DIRECTORY $ + LIBRARY_OUTPUT_DIRECTORY $ +) + +target_sources(HydraCuda + PUBLIC + BufferConversions.h + CHelpers.h + CommandLineProgress.h + CudaDeviceInfo.h + CudaDeviceStats.h + CudaUtilities.h + CWrapperAutogen.h + CWrappers.h + Defines.h + ImageChunk.h + ImageView.h + KernelGenerators.h + Vec.h + PRIVATE + CudaAddTwoImages.cuh + CudaClosure.cuh + CudaDeviceCount.cuh + CudaDeviceImages.cuh + CudaElementWiseDifference.cuh + CudaEntropyFilter.cuh + CudaGaussian.cuh + CudaGetMinMax.cuh + CudaHighPassFilter.cuh + CudaIdentityFilter.cuh + CudaImageContainer.cuh + CudaImageContainerClean.cuh + CudaLoG.cuh + CudaMaxFilter.cuh + CudaMeanAndVariance.cuh + CudaMeanFilter.cuh + CudaMedianFilter.cuh + CudaMemoryStats.cuh + CudaMinFilter.cuh + CudaMinMax.cuh + CudaMultiplySum.cuh + CudaNLMeans.cuh + CudaOpener.cuh + CudaStdFilter.cuh + CudaSum.cuh + CudaVarFilter.cuh + CudaWienerFilter.cuh + ImageDimensions.cuh + Kernel.cuh + KernelIterator.cuh + SeparableMultiplySum.cuh + + CHelpers.cpp + CudaDeviceInfo.cpp + CudaDeviceStats.cpp + CudaUtilities.cpp + GaussianKernel.cpp + ImageChunk.cpp + LoGKernel.cpp + + CWrapperAutogen.cu + CWrappers.cu + Kernel.cu + KernelIterator.cu +) + +target_include_directories(HydraCuda PUBLIC $ $ diff --git a/src/c/Cuda/CWrappers.h b/src/c/Cuda/CWrappers.h index e85910d..42dd374 100644 --- a/src/c/Cuda/CWrappers.h +++ b/src/c/Cuda/CWrappers.h @@ -1,40 +1,48 @@ -#pragma once -#include "Vec.h" -#include "CudaDeviceStats.h" -#include "ImageView.h" - -#include -#include - - -#ifdef IMAGE_PROCESSOR_DLL -#ifdef IMAGE_PROCESSOR_INTERNAL -#define IMAGE_PROCESSOR_API __declspec(dllexport) -#else -#define IMAGE_PROCESSOR_API __declspec(dllimport) -#endif // IMAGE_PROCESSOR_INTERNAL -#else -#define IMAGE_PROCESSOR_API -#endif // IMAGE_PROCESSOR_DLL - - -#include "CWrapperAutogen.h" - - -IMAGE_PROCESSOR_API void clearDevice(); - -IMAGE_PROCESSOR_API int deviceCount(); -IMAGE_PROCESSOR_API int deviceStats(DevStats** stats); -IMAGE_PROCESSOR_API int memoryStats(std::size_t** stats); - - -/// Example wrapper header calls -//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); -//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); -//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); -//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); -//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); -//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); -//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); -//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); -//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); +#pragma once + +#ifdef _WIN32 + #define DLL_EXPORT_API __declspec(dllexport) + #define DLL_IMPORT_API __declspec(dllimport) +#else + #define DLL_EXPORT_API __attribute__((visibility("default"))) + #define DLL_IMPORT_API +#endif + +#ifdef IMAGE_PROCESSOR_DLL + #ifdef IMAGE_PROCESSOR_EXPORT + #define IMAGE_PROCESSOR_API DLL_EXPORT_API + #else + #define IMAGE_PROCESSOR_API DLL_IMPORT_API + #endif // IMAGE_PROCESSOR_EXPORT +#else + #define IMAGE_PROCESSOR_API +#endif // IMAGE_PROCESSOR_DLL + + +#include +#include + +#include "Vec.h" +#include "ImageView.h" +#include "CudaDeviceStats.h" + +#include "CWrapperAutogen.h" + + +IMAGE_PROCESSOR_API void clearDevice(); + +IMAGE_PROCESSOR_API int deviceCount(); +IMAGE_PROCESSOR_API int deviceStats(DevStats** stats); +IMAGE_PROCESSOR_API int memoryStats(std::size_t** stats); + + +/// Example wrapper header calls +//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); +//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); +//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); +//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); +//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); +//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); +//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); +//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); +//IMAGE_PROCESSOR_API void fooFilter(const ImageView imageIn, ImageView imageOut, ImageView kernel, int numIterations = 1, int device = -1); diff --git a/src/c/Mex/CMakeLists.txt b/src/c/Mex/CMakeLists.txt index c655c79..2b0af1c 100644 --- a/src/c/Mex/CMakeLists.txt +++ b/src/c/Mex/CMakeLists.txt @@ -14,7 +14,7 @@ if ( USE_PROCESS_MUTEX ) endif() # Link against Matlab libraries -target_link_libraries(HydraMex PRIVATE HydraCudaLib ${Matlab_MX_LIBRARY} ${Matlab_MEX_LIBRARY}) +target_link_libraries(HydraMex PRIVATE HydraCudaStatic ${Matlab_MX_LIBRARY} ${Matlab_MEX_LIBRARY}) # Change output library name to .mex set_target_properties(HydraMex diff --git a/src/c/Python/CMakeLists.txt b/src/c/Python/CMakeLists.txt index 350f6bc..8c86a71 100644 --- a/src/c/Python/CMakeLists.txt +++ b/src/c/Python/CMakeLists.txt @@ -11,7 +11,7 @@ if ( USE_PROCESS_MUTEX ) endif() # Link against Python and NumPy libraries -target_link_libraries(HydraPy PRIVATE HydraCudaLib Python::Python Python::NumPy) +target_link_libraries(HydraPy PRIVATE HydraCudaStatic Python::Python Python::NumPy) # Change output library name to Hydra. set_target_properties(HydraPy