diff --git a/Common/DCAFitter/GPU/DeviceInterface/GPUInterface.h b/Common/DCAFitter/GPU/DeviceInterface/GPUInterface.h index d2d9bce45804b..8474a68d757b8 100644 --- a/Common/DCAFitter/GPU/DeviceInterface/GPUInterface.h +++ b/Common/DCAFitter/GPU/DeviceInterface/GPUInterface.h @@ -17,7 +17,6 @@ #include #include -#include namespace o2 { @@ -28,12 +27,12 @@ namespace device #if !defined(__HIPCC__) && !defined(__CUDACC__) typedef struct _dummyStream { -} stream; +} Stream; #else #ifdef __HIPCC__ -typedef hipStream_t stream; +typedef hipStream_t Stream; #else -typedef cudaStream_t stream; +typedef cudaStream_t Stream; #endif #endif @@ -46,43 +45,24 @@ class GPUInterface static GPUInterface* Instance(); // APIs - void register(void*, size_t); - void allocAsync(void**, size_t, unsigned short streamId = -1); + void registerBuffer(void*, size_t); + void unregisterBuffer(void* addr); + void allocDevice(void**, size_t); + void freeDevice(void*); + Stream& getStream(short N = 0); protected: - GPUInterface(size_t N) - { - resize(N); - } + GPUInterface(size_t N = 1); + ~GPUInterface(); void resize(size_t); unsigned short getNextCursor(); - static GPUInterface* sGPUInterface = nullptr; - std::atomic mCursor{0}; + static GPUInterface* sGPUInterface; std::vector mPool{}; - std::vector mStreams{}; + std::vector mStreams{}; }; -inline void GPUInterface::resize(size_t N) -{ - mPool.resize(N); - mStreams.resize(N); -} - -inline unsigned short GPUInterface::getNextCursor() -{ - auto index = mCursor++; - - auto id = index % mPool.size(); - - auto oldValue = mCursor; - auto newValue = oldValue % mPool.size(); - while (!mCursor.compare_exchange_weak(oldValue, newValue, std::memory_order_relaxed)) { - newValue = oldValue % mPool.size(); - } - return id; -} } // namespace device } // namespace vertexing } // namespace o2 diff --git a/Common/DCAFitter/GPU/DeviceInterface/GPUInterfaceConfigParam.h b/Common/DCAFitter/GPU/DeviceInterface/GPUInterfaceConfigParam.h deleted file mode 100644 index 38a650b19e7fc..0000000000000 --- a/Common/DCAFitter/GPU/DeviceInterface/GPUInterfaceConfigParam.h +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#ifndef GPUMANAGER_INTERFACE_PARAMS -#define GPUMANAGER_INTERFACE_PARAMS - -#include "CommonUtils/ConfigurableParam.h" -#include "CommonUtils/ConfigurableParamHelper.h" -namespace o2::vertexing::device -{ -struct GPUInterfaceParamConfig : public o2::conf::ConfigurableParamHelper { - size_t streamPoolSize = 8; - O2ParamDef(GPUInterfaceParamConfig, "GPUInterfaceParams") -}; -} // namespace o2::vertexing::device -#endif \ No newline at end of file diff --git a/Common/DCAFitter/GPU/cuda/CMakeLists.txt b/Common/DCAFitter/GPU/cuda/CMakeLists.txt index 36481c9500eb0..42c2b8f0d5059 100644 --- a/Common/DCAFitter/GPU/cuda/CMakeLists.txt +++ b/Common/DCAFitter/GPU/cuda/CMakeLists.txt @@ -20,10 +20,7 @@ o2_add_library(DCAFitterCUDA O2::DetectorsBase PRIVATE_LINK_LIBRARIES O2::GPUTrackingCUDAExternalProvider) set_property(TARGET ${targetName} PROPERTY CUDA_SEPARABLE_COMPILATION ON) - -o2_target_root_dictionary(DCAFitterCUDA - HEADERS ../DeviceInterface/GPUInterfaceConfigParam.h - LINKDEF DCAFitterGPULinkDef.h) +add_compile_options(-lineinfo) o2_add_test(DCAFitterNCUDA SOURCES test/testDCAFitterNGPU.cxx diff --git a/Common/DCAFitter/GPU/cuda/DCAFitterGPULinkDef.h b/Common/DCAFitter/GPU/cuda/DCAFitterGPULinkDef.h deleted file mode 100644 index 6267dd701c5ab..0000000000000 --- a/Common/DCAFitter/GPU/cuda/DCAFitterGPULinkDef.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#ifdef __CLING__ - -#pragma link off all globals; -#pragma link off all classes; -#pragma link off all functions; - -#pragma link C++ class o2::vertexing::device::GPUInterfaceParamConfig + ; -#pragma link C++ class o2::conf::ConfigurableParamHelper < o2::vertexing::device::GPUInterfaceParamConfig> + ; - -#endif \ No newline at end of file diff --git a/Common/DCAFitter/GPU/cuda/DCAFitterN.cu b/Common/DCAFitter/GPU/cuda/DCAFitterN.cu index 456069f3fb540..5a0b79007b202 100644 --- a/Common/DCAFitter/GPU/cuda/DCAFitterN.cu +++ b/Common/DCAFitter/GPU/cuda/DCAFitterN.cu @@ -150,14 +150,16 @@ std::vector processBulk(const int nBlocks, int iArg{0}; ([&] { - gpuInterface->register(args.data(), sizeof(Tr) * args.size()); - gpuInterface->allocAsync(reinterpret_cast(&(tracks_device[iArg])), sizeof(Tr) * args.size()); - gpuCheckError(cudaMemcpy(tracks_device[iArg], args.data(), sizeof(Tr) * args.size(), cudaMemcpyHostToDevice)); + gpuInterface->registerBuffer(reinterpret_cast(args.data()), sizeof(Tr) * args.size()); + gpuInterface->allocDevice(reinterpret_cast(&(tracks_device[iArg])), sizeof(Tr) * args.size()); + gpuCheckError(cudaMemcpyAsync(tracks_device[iArg], args.data(), sizeof(Tr) * args.size(), cudaMemcpyHostToDevice, gpuInterface->getStream(iArg))); ++iArg; }(), ...); - gpuCheckError(cudaMalloc(reinterpret_cast(&results_device), sizeof(int) * nFits)); - gpuCheckError(cudaMalloc(reinterpret_cast(&fitters_device), sizeof(Fitter) * nFits)); + gpuInterface->registerBuffer(reinterpret_cast(fitters.data()), sizeof(Fitter) * nFits); + gpuInterface->registerBuffer(reinterpret_cast(results.data()), sizeof(int) * nFits); + gpuInterface->allocDevice(reinterpret_cast(&results_device), sizeof(int) * nFits); + gpuInterface->allocDevice(reinterpret_cast(&fitters_device), sizeof(Fitter) * nFits); gpuCheckError(cudaMemcpy(fitters_device, fitters.data(), sizeof(Fitter) * nFits, cudaMemcpyHostToDevice)); gpuCheckError(cudaEventRecord(start)); @@ -172,14 +174,17 @@ std::vector processBulk(const int nBlocks, iArg = 0; ([&] { - gpuCheckError(cudaMemcpy(args.data(), tracks_device[iArg], sizeof(Tr) * args.size(), cudaMemcpyDeviceToHost)); - gpuCheckError(cudaFree(tracks_device[iArg])); + gpuCheckError(cudaMemcpyAsync(args.data(), tracks_device[iArg], sizeof(Tr) * args.size(), cudaMemcpyDeviceToHost, gpuInterface->getStream(iArg))); + gpuInterface->freeDevice(tracks_device[iArg]); + gpuInterface->unregisterBuffer(args.data()); ++iArg; }(), ...); - gpuCheckError(cudaFree(fitters_device)); - gpuCheckError(cudaFree(results_device)); + gpuInterface->freeDevice(fitters_device); + gpuInterface->freeDevice(results_device); + gpuInterface->unregisterBuffer(fitters.data()); + gpuInterface->unregisterBuffer(results.data()); gpuCheckError(cudaEventSynchronize(stop)); float milliseconds = 0; diff --git a/Common/DCAFitter/GPU/cuda/GPUInterface.cu b/Common/DCAFitter/GPU/cuda/GPUInterface.cu index 8e301edbbbd9a..5054d5bbb6d87 100644 --- a/Common/DCAFitter/GPU/cuda/GPUInterface.cu +++ b/Common/DCAFitter/GPU/cuda/GPUInterface.cu @@ -18,7 +18,6 @@ #include #include "DeviceInterface/GPUInterface.h" -#include "DeviceInterface/GPUInterfaceConfigParam.h" #define gpuCheckError(x) \ { \ @@ -36,21 +35,59 @@ inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = namespace o2::vertexing::device { + +GPUInterface::GPUInterface(size_t N) +{ + resize(N); + for (auto& st : mStreams) { + gpuCheckError(cudaStreamCreate(&st)); + } +} + +GPUInterface::~GPUInterface() +{ + for (auto& st : mStreams) { + gpuCheckError(cudaStreamDestroy(st)); + } +} + +void GPUInterface::resize(size_t N) +{ + mPool.resize(N); + mStreams.resize(N); +} + +void GPUInterface::registerBuffer(void* addr, size_t bufferSize) +{ + gpuCheckError(cudaHostRegister(addr, bufferSize, cudaHostRegisterDefault)); +} + +void GPUInterface::unregisterBuffer(void* addr) +{ + gpuCheckError(cudaHostUnregister(addr)); +} + GPUInterface* GPUInterface::sGPUInterface = nullptr; GPUInterface* GPUInterface::Instance() { if (sGPUInterface == nullptr) { - sGPUInterface = new GPUInterface(o2::vertexing::device::GPUInterfaceParamConfig::Instance().streamPoolSize); + sGPUInterface = new GPUInterface(8); // FIXME: get some configurable param to do so. } return sGPUInterface; } -void GPUInterface::register(void* addr, size_t bufferSize) +void GPUInterface::allocDevice(void** addrPtr, size_t bufferSize) { - gpuCheckError(cudaHostRegister(addr, bufferSize, cudaHostRegisterDefault)); + gpuCheckError(cudaMalloc(addrPtr, bufferSize)); } -void GPUInterface::allocAsync(void** addrPtr, size_t bufferSize, unsigned short streamId) { +void GPUInterface::freeDevice(void* addr) +{ + gpuCheckError(cudaFree(addr)); +} +Stream& GPUInterface::getStream(short N) +{ + return mStreams[N % mStreams.size()]; } } // namespace o2::vertexing::device \ No newline at end of file diff --git a/Common/DCAFitter/GPU/cuda/GPUInterfaceConfigParam.cxx b/Common/DCAFitter/GPU/cuda/GPUInterfaceConfigParam.cxx deleted file mode 100644 index e72873a36156c..0000000000000 --- a/Common/DCAFitter/GPU/cuda/GPUInterfaceConfigParam.cxx +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#include "DeviceInterface/GPUInterfaceConfigParam.h" - -namespace o2::vertexing::device -{ -static auto& sGPUInterfaceParam = o2::vertexing::device::GPUInterfaceParamConfig::Instance(); - -O2ParamImpl(os::vertexing::device::GPUInterfaceParamConfig); -} // namespace o2::vertexing::device \ No newline at end of file diff --git a/Common/DCAFitter/GPU/cuda/test/testDCAFitterNGPU.cxx b/Common/DCAFitter/GPU/cuda/test/testDCAFitterNGPU.cxx index ff4f12827f1e8..14ed8004d3126 100644 --- a/Common/DCAFitter/GPU/cuda/test/testDCAFitterNGPU.cxx +++ b/Common/DCAFitter/GPU/cuda/test/testDCAFitterNGPU.cxx @@ -23,9 +23,9 @@ #include #include -#define nBlocks 60 -#define nThreads 1024 -#define NTest 1000000 +#define nBlocks 30 +#define nThreads 256 +#define NTest 100000 namespace o2 { diff --git a/Common/DCAFitter/GPU/hip/CMakeLists.txt b/Common/DCAFitter/GPU/hip/CMakeLists.txt index f62759bb6ea2c..a804775d051c7 100644 --- a/Common/DCAFitter/GPU/hip/CMakeLists.txt +++ b/Common/DCAFitter/GPU/hip/CMakeLists.txt @@ -20,7 +20,7 @@ o2_add_hipified_library(DCAFitterHIP hip::host PRIVATE_LINK_LIBRARIES O2::GPUTrackingHIPExternalProvider TARGETVARNAME targetNAme) - + o2_add_test(DCAFitterNHIP SOURCES ../cuda/test/testDCAFitterNGPU.cxx PUBLIC_LINK_LIBRARIES O2::ReconstructionDataFormats