diff --git a/Common/DCAFitter/GPU/cuda/CMakeLists.txt b/Common/DCAFitter/GPU/cuda/CMakeLists.txt index 149cef4a8a843..a498d0c350202 100644 --- a/Common/DCAFitter/GPU/cuda/CMakeLists.txt +++ b/Common/DCAFitter/GPU/cuda/CMakeLists.txt @@ -8,7 +8,7 @@ # In applying this license CERN does not waive the privileges and immunities # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -add_compile_definitions(GPUCA_GPU_DEBUG_PRINT) + o2_add_library(DCAFitterCUDA TARGETVARNAME targetName SOURCES DCAFitterN.cu diff --git a/Common/DCAFitter/GPU/cuda/DCAFitterN.cu b/Common/DCAFitter/GPU/cuda/DCAFitterN.cu index 31fc34614e2ec..e3896bf6de89c 100644 --- a/Common/DCAFitter/GPU/cuda/DCAFitterN.cu +++ b/Common/DCAFitter/GPU/cuda/DCAFitterN.cu @@ -36,113 +36,84 @@ namespace o2::vertexing::device { namespace kernel { -GPUg() void printKernel(o2::vertexing::DCAFitterN<2>* ft) +template +GPUg() void printKernel(o2::vertexing::DCAFitterN* ft) { if (threadIdx.x == 0) { - printf(" =============== GPU DCA Fitter ================\n"); + printf(" =============== GPU DCA Fitter %d prongs ================\n", N); ft->print(); - printf(" ===============================================\n"); + printf(" =========================================================\n\n"); } } -GPUg() void processKernel(o2::vertexing::DCAFitterN<2>* ft, o2::track::TrackParCov* t1, o2::track::TrackParCov* t2, int* res) +template +GPUg() void processKernel(Fitter* ft, int* res, Tr*... tracks) { - *res = ft->process(*t1, *t2); + *res = ft->process(*tracks...); } } // namespace kernel -void print(o2::vertexing::DCAFitterN<2>& ft, - const int nBlocks, - const int nThreads) +/// CPU handlers +template +void print(const int nBlocks, + const int nThreads, + Fitter& ft) { - DCAFitterN<2>* ft_device; - gpuCheckError(cudaMalloc(reinterpret_cast(&ft_device), sizeof(o2::vertexing::DCAFitterN<2>))); - gpuCheckError(cudaMemcpy(ft_device, &ft, sizeof(o2::vertexing::DCAFitterN<2>), cudaMemcpyHostToDevice)); + Fitter* ft_device; + gpuCheckError(cudaMalloc(reinterpret_cast(&ft_device), sizeof(Fitter))); + gpuCheckError(cudaMemcpy(ft_device, &ft, sizeof(Fitter), cudaMemcpyHostToDevice)); - kernel::printKernel<<>>(ft_device); + kernel::printKernel<<>>(ft_device); gpuCheckError(cudaPeekAtLastError()); gpuCheckError(cudaDeviceSynchronize()); } -int process(o2::vertexing::DCAFitterN<2>& fitter, - o2::track::TrackParCov& track1, - o2::track::TrackParCov& track2, - const int nBlocks, - const int nThreads) -{ - DCAFitterN<2>* ft_device; - o2::track::TrackParCov* t1_device; - o2::track::TrackParCov* t2_device; - int result, *result_device; - - gpuCheckError(cudaMalloc(reinterpret_cast(&ft_device), sizeof(o2::vertexing::DCAFitterN<2>))); - gpuCheckError(cudaMalloc(reinterpret_cast(&t1_device), sizeof(o2::track::TrackParCov))); - gpuCheckError(cudaMalloc(reinterpret_cast(&t2_device), sizeof(o2::track::TrackParCov))); - gpuCheckError(cudaMalloc(reinterpret_cast(&result_device), sizeof(int))); - - gpuCheckError(cudaMemcpy(ft_device, &fitter, sizeof(o2::vertexing::DCAFitterN<2>), cudaMemcpyHostToDevice)); - gpuCheckError(cudaMemcpy(t1_device, &track1, sizeof(o2::track::TrackParCov), cudaMemcpyHostToDevice)); - gpuCheckError(cudaMemcpy(t2_device, &track2, sizeof(o2::track::TrackParCov), cudaMemcpyHostToDevice)); - - kernel::processKernel<<>>(ft_device, t1_device, t2_device, result_device); - - gpuCheckError(cudaPeekAtLastError()); - gpuCheckError(cudaDeviceSynchronize()); - - gpuCheckError(cudaMemcpy(&result, result_device, sizeof(int), cudaMemcpyDeviceToHost)); - gpuCheckError(cudaMemcpy(&fitter, ft_device, sizeof(o2::vertexing::DCAFitterN<2>), cudaMemcpyDeviceToHost)); - gpuCheckError(cudaMemcpy(&track1, t1_device, sizeof(o2::track::TrackParCov), cudaMemcpyDeviceToHost)); - gpuCheckError(cudaMemcpy(&track2, t2_device, sizeof(o2::track::TrackParCov), cudaMemcpyDeviceToHost)); - gpuCheckError(cudaFree(ft_device)); - gpuCheckError(cudaFree(t1_device)); - gpuCheckError(cudaFree(t2_device)); - - gpuCheckError(cudaFree(result_device)); - - return result; -} - -template -int process(o2::vertexing::DCAFitterN<2>&, - const int nBlocks = 1, - const int nThreads = 1, +template +int process(const int nBlocks, + const int nThreads, + Fitter& fitter, Tr&... args) { - DCAFitterN* ft_device; - std::array tracks_device; - // o2::track::TrackParCov* t1_device; - // o2::track::TrackParCov* t2_device; + Fitter* ft_device; + std::array tracks_device; int result, *result_device; - gpuCheckError(cudaMalloc(reinterpret_cast(&ft_device), sizeof(o2::vertexing::DCAFitterN))); - // gpuCheckError(cudaMalloc(reinterpret_cast(&t1_device), sizeof(o2::track::TrackParCov))); - // gpuCheckError(cudaMalloc(reinterpret_cast(&t2_device), sizeof(o2::track::TrackParCov))); - for (int iT{0}; iT < N; ++iT) { - gpuCheckError(cudaMalloc(reinterpret_cast(&(tracks_device[iT])), sizeof(o2::track::TrackParCov))); - } + gpuCheckError(cudaMalloc(reinterpret_cast(&ft_device), sizeof(Fitter))); gpuCheckError(cudaMalloc(reinterpret_cast(&result_device), sizeof(int))); - gpuCheckError(cudaMemcpy(ft_device, &fitter, sizeof(o2::vertexing::DCAFitterN<2>), cudaMemcpyHostToDevice)); - gpuCheckError(cudaMemcpy(t1_device, &track1, sizeof(o2::track::TrackParCov), cudaMemcpyHostToDevice)); - gpuCheckError(cudaMemcpy(t2_device, &track2, sizeof(o2::track::TrackParCov), cudaMemcpyHostToDevice)); + int iArg{0}; + ([&] { + gpuCheckError(cudaMalloc(reinterpret_cast(&(tracks_device[iArg])), sizeof(o2::track::TrackParCov))); + gpuCheckError(cudaMemcpy(tracks_device[iArg], &args, sizeof(o2::track::TrackParCov), cudaMemcpyHostToDevice)); + ++iArg; + }(), + ...); + + gpuCheckError(cudaMemcpy(ft_device, &fitter, sizeof(Fitter), cudaMemcpyHostToDevice)); - kernel::processKernel<<>>(ft_device, t1_device, t2_device, result_device); + std::apply([&](auto&&... args) { kernel::processKernel<<>>(ft_device, result_device, args...); }, tracks_device); gpuCheckError(cudaPeekAtLastError()); gpuCheckError(cudaDeviceSynchronize()); gpuCheckError(cudaMemcpy(&result, result_device, sizeof(int), cudaMemcpyDeviceToHost)); - gpuCheckError(cudaMemcpy(&fitter, ft_device, sizeof(o2::vertexing::DCAFitterN<2>), cudaMemcpyDeviceToHost)); - gpuCheckError(cudaMemcpy(&track1, t1_device, sizeof(o2::track::TrackParCov), cudaMemcpyDeviceToHost)); - gpuCheckError(cudaMemcpy(&track2, t2_device, sizeof(o2::track::TrackParCov), cudaMemcpyDeviceToHost)); - gpuCheckError(cudaFree(ft_device)); - gpuCheckError(cudaFree(t1_device)); - gpuCheckError(cudaFree(t2_device)); + gpuCheckError(cudaMemcpy(&fitter, ft_device, sizeof(Fitter), cudaMemcpyDeviceToHost)); + iArg = 0; + ([&] { + gpuCheckError(cudaMemcpy(&args, tracks_device[iArg], sizeof(o2::track::TrackParCov), cudaMemcpyDeviceToHost)); + gpuCheckError(cudaFree(tracks_device[iArg])); + ++iArg; + }(), + ...); gpuCheckError(cudaFree(result_device)); return result; } +template int process(const int, const int, o2::vertexing::DCAFitterN<2>&, o2::track::TrackParCov&, o2::track::TrackParCov&); +template int process(const int, const int, o2::vertexing::DCAFitterN<3>&, o2::track::TrackParCov&, o2::track::TrackParCov&, o2::track::TrackParCov&); +template void print(const int, const int, o2::vertexing::DCAFitterN<2>&); +template void print(const int, const int, o2::vertexing::DCAFitterN<3>&); } // namespace o2::vertexing::device \ No newline at end of file diff --git a/Common/DCAFitter/GPU/cuda/test/testDCAFitterNGPU.cxx b/Common/DCAFitter/GPU/cuda/test/testDCAFitterNGPU.cxx index 57172079ccf09..2a2d42105d6b6 100644 --- a/Common/DCAFitter/GPU/cuda/test/testDCAFitterNGPU.cxx +++ b/Common/DCAFitter/GPU/cuda/test/testDCAFitterNGPU.cxx @@ -189,7 +189,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) ft.setUseAbsDCA(true); swA.Start(false); - int ncA = device::process(ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + int ncA = device::process(1, 1, ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES swA.Stop(); LOG(debug) << "fit abs.dist " << iev << " NC: " << ncA << " Chi2: " << (ncA ? ft.getChi2AtPCACandidate(0) : -1); if (ncA) { @@ -201,7 +201,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) ft.setUseAbsDCA(true); ft.setWeightedFinalPCA(true); swAW.Start(false); - int ncAW = device::process(ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + int ncAW = device::process(1, 1, ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES swAW.Stop(); LOG(debug) << "fit abs.dist with final weighted DCA " << iev << " NC: " << ncAW << " Chi2: " << (ncAW ? ft.getChi2AtPCACandidate(0) : -1); if (ncAW) { @@ -213,7 +213,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) ft.setUseAbsDCA(false); ft.setWeightedFinalPCA(false); swW.Start(false); - int ncW = device::process(ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + int ncW = device::process(1, 1, ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES swW.Stop(); LOG(debug) << "fit wgh.dist " << iev << " NC: " << ncW << " Chi2: " << (ncW ? ft.getChi2AtPCACandidate(0) : -1); if (ncW) { @@ -222,8 +222,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) nfoundW++; } } - // ft.print(); - device::print(ft, 1, 1); + device::print(1, 1, ft); meanDA /= nfoundA ? nfoundA : 1; meanDAW /= nfoundA ? nfoundA : 1; meanDW /= nfoundW ? nfoundW : 1; @@ -268,7 +267,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) ft.setUseAbsDCA(true); swA.Start(false); - int ncA = device::process(ft, vctracks[0], vctracks[1]); + int ncA = device::process(1, 1, ft, vctracks[0], vctracks[1]); swA.Stop(); LOG(debug) << "fit abs.dist " << iev << " NC: " << ncA << " Chi2: " << (ncA ? ft.getChi2AtPCACandidate(0) : -1); if (ncA) { @@ -280,7 +279,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) ft.setUseAbsDCA(true); ft.setWeightedFinalPCA(true); swAW.Start(false); - int ncAW = device::process(ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + int ncAW = device::process(1, 1, ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES swAW.Stop(); LOG(debug) << "fit abs.dist with final weighted DCA " << iev << " NC: " << ncAW << " Chi2: " << (ncAW ? ft.getChi2AtPCACandidate(0) : -1); if (ncAW) { @@ -292,7 +291,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) ft.setUseAbsDCA(false); ft.setWeightedFinalPCA(false); swW.Start(false); - int ncW = device::process(ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + int ncW = device::process(1, 1, ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES swW.Stop(); LOG(debug) << "fit wgh.dist " << iev << " NC: " << ncW << " Chi2: " << (ncW ? ft.getChi2AtPCACandidate(0) : -1); if (ncW) { @@ -302,7 +301,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) } } - device::print(ft, 1, 1); + device::print(1, 1, ft); meanDA /= nfoundA ? nfoundA : 1; meanDAW /= nfoundA ? nfoundA : 1; meanDW /= nfoundW ? nfoundW : 1; @@ -347,7 +346,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) ft.setUseAbsDCA(true); swA.Start(false); - int ncA = device::process(ft, vctracks[0], vctracks[1]); + int ncA = device::process(1, 1, ft, vctracks[0], vctracks[1]); swA.Stop(); LOG(debug) << "fit abs.dist with final weighted DCA " << iev << " NC: " << ncA << " Chi2: " << (ncA ? ft.getChi2AtPCACandidate(0) : -1); if (ncA) { @@ -359,7 +358,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) ft.setUseAbsDCA(true); ft.setWeightedFinalPCA(true); swAW.Start(false); - int ncAW = device::process(ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + int ncAW = device::process(1, 1, ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES swAW.Stop(); LOG(debug) << "fit abs.dist " << iev << " NC: " << ncAW << " Chi2: " << (ncAW ? ft.getChi2AtPCACandidate(0) : -1); if (ncAW) { @@ -371,7 +370,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) ft.setUseAbsDCA(false); ft.setWeightedFinalPCA(false); swW.Start(false); - int ncW = device::process(ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + int ncW = device::process(1, 1, ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES swW.Stop(); LOG(debug) << "fit wgh.dist " << iev << " NC: " << ncW << " Chi2: " << (ncW ? ft.getChi2AtPCACandidate(0) : -1); if (ncW) { @@ -380,7 +379,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) nfoundW++; } } - device::print(ft, 1, 1); + device::print(1, 1, ft); meanDA /= nfoundA ? nfoundA : 1; meanDAW /= nfoundAW ? nfoundAW : 1; meanDW /= nfoundW ? nfoundW : 1; @@ -424,7 +423,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) ft.setUseAbsDCA(true); swA.Start(false); - int ncA = device::process(ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + int ncA = device::process(1, 1, ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES swA.Stop(); LOG(debug) << "fit abs.dist " << iev << " NC: " << ncA << " Chi2: " << (ncA ? ft.getChi2AtPCACandidate(0) : -1); if (ncA) { @@ -436,7 +435,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) ft.setUseAbsDCA(true); ft.setWeightedFinalPCA(true); swAW.Start(false); - int ncAW = device::process(ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + int ncAW = device::process(1, 1, ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES swAW.Stop(); LOG(debug) << "fit abs.dist " << iev << " NC: " << ncAW << " Chi2: " << (ncAW ? ft.getChi2AtPCACandidate(0) : -1); if (ncAW) { @@ -448,7 +447,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) ft.setUseAbsDCA(false); ft.setWeightedFinalPCA(false); swW.Start(false); - int ncW = device::process(ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + int ncW = device::process(1, 1, ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES swW.Stop(); LOG(debug) << "fit wgh.dist " << iev << " NC: " << ncW << " Chi2: " << (ncW ? ft.getChi2AtPCACandidate(0) : -1); if (ncW) { @@ -457,17 +456,17 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) nfoundW++; } } - device::print(ft, 1, 1); + device::print(1, 1, ft); meanDA /= nfoundA ? nfoundA : 1; meanDAW /= nfoundAW ? nfoundAW : 1; meanDW /= nfoundW ? nfoundW : 1; LOG(info) << "Processed " << NTest << " 2-prong vertices: Line : Line"; LOG(info) << "2-prongs with abs.dist minization: eff= " << float(nfoundA) / NTest - << " mean.dist to truth: " << meanDA << " CPU time: " << swA.CpuTime(); + << " mean.dist to truth: " << meanDA << " CPU time: " << swA.CpuTime(); LOG(info) << "2-prongs with abs.dist but wghPCA: eff= " << float(nfoundAW) / NTest - << " mean.dist to truth: " << meanDAW << " CPU time: " << swAW.CpuTime(); + << " mean.dist to truth: " << meanDAW << " CPU time: " << swAW.CpuTime(); LOG(info) << "2-prongs with wgh.dist minization: eff= " << float(nfoundW) / NTest - << " mean.dist to truth: " << meanDW << " CPU time: " << swW.CpuTime(); + << " mean.dist to truth: " << meanDW << " CPU time: " << swW.CpuTime(); BOOST_CHECK(nfoundA > 0.99 * NTest); BOOST_CHECK(nfoundAW > 0.99 * NTest); BOOST_CHECK(nfoundW > 0.99 * NTest); @@ -476,81 +475,81 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) BOOST_CHECK(meanDW < 0.1); } - // // 3 prongs vertices - // { - // std::vector forceQ{1, 1, 1}; - - // o2::vertexing::DCAFitterN<3> ft; // 3 prong fitter - // ft.setBz(bz); - // ft.setPropagateToPCA(true); // After finding the vertex, propagate tracks to the DCA. This is default anyway - // ft.setMaxR(200); // do not consider V0 seeds with 2D circles crossing above this R. This is default anyway - // ft.setMaxDZIni(4); // do not consider V0 seeds with tracks Z-distance exceeding this. This is default anyway - // ft.setMinParamChange(1e-3); // stop iterations if max correction is below this value. This is default anyway - // ft.setMinRelChi2Change(0.9); // stop iterations if chi2 improves by less that this factor - - // std::string treeName3A = "pr3a", treeName3AW = "pr3aw", treeName3W = "pr3w"; - // TStopwatch swA, swAW, swW; - // int nfoundA = 0, nfoundAW = 0, nfoundW = 0; - // double meanDA = 0, meanDAW = 0, meanDW = 0; - // swA.Stop(); - // swAW.Stop(); - // swW.Stop(); - // for (int iev = 0; iev < NTest; iev++) { - // auto genParent = generate(vtxGen, vctracks, bz, genPHS, dch, dchdec, forceQ); - - // ft.setUseAbsDCA(true); - // swA.Start(false); - // int ncA = ft.process(vctracks[0], vctracks[1], vctracks[2]); // HERE WE FIT THE VERTICES - // swA.Stop(); - // LOG(debug) << "fit abs.dist " << iev << " NC: " << ncA << " Chi2: " << (ncA ? ft.getChi2AtPCACandidate(0) : -1); - // if (ncA) { - // auto minD = checkResults(outStream, treeName3A, ft, vtxGen, genParent, dchdec); - // meanDA += minD; - // nfoundA++; - // } - - // ft.setUseAbsDCA(true); - // ft.setWeightedFinalPCA(true); - // swAW.Start(false); - // int ncAW = ft.process(vctracks[0], vctracks[1], vctracks[2]); // HERE WE FIT THE VERTICES - // swAW.Stop(); - // LOG(debug) << "fit abs.dist " << iev << " NC: " << ncAW << " Chi2: " << (ncAW ? ft.getChi2AtPCACandidate(0) : -1); - // if (ncAW) { - // auto minD = checkResults(outStream, treeName3AW, ft, vtxGen, genParent, dchdec); - // meanDAW += minD; - // nfoundAW++; - // } - - // ft.setUseAbsDCA(false); - // ft.setWeightedFinalPCA(false); - // swW.Start(false); - // int ncW = ft.process(vctracks[0], vctracks[1], vctracks[2]); // HERE WE FIT THE VERTICES - // swW.Stop(); - // LOG(debug) << "fit wgh.dist " << iev << " NC: " << ncW << " Chi2: " << (ncW ? ft.getChi2AtPCACandidate(0) : -1); - // if (ncW) { - // auto minD = checkResults(outStream, treeName3W, ft, vtxGen, genParent, dchdec); - // meanDW += minD; - // nfoundW++; - // } - // } - // ft.print(); - // meanDA /= nfoundA ? nfoundA : 1; - // meanDAW /= nfoundAW ? nfoundAW : 1; - // meanDW /= nfoundW ? nfoundW : 1; - // LOG(debug) << "Processed " << NTest << " 3-prong vertices"; - // LOG(debug) << "3-prongs with abs.dist minization: eff= " << float(nfoundA) / NTest - // << " mean.dist to truth: " << meanDA << " CPU time: " << swA.CpuTime(); - // LOG(debug) << "3-prongs with abs.dist but wghPCA: eff= " << float(nfoundAW) / NTest - // << " mean.dist to truth: " << meanDAW << " CPU time: " << swAW.CpuTime(); - // LOG(debug) << "3-prongs with wgh.dist minization: eff= " << float(nfoundW) / NTest - // << " mean.dist to truth: " << meanDW << " CPU time: " << swW.CpuTime(); - // BOOST_CHECK(nfoundA > 0.99 * NTest); - // BOOST_CHECK(nfoundAW > 0.99 * NTest); - // BOOST_CHECK(nfoundW > 0.99 * NTest); - // BOOST_CHECK(meanDA < 0.1); - // BOOST_CHECK(meanDAW < 0.1); - // BOOST_CHECK(meanDW < 0.1); - // } + // 3 prongs vertices + { + std::vector forceQ{1, 1, 1}; + + o2::vertexing::DCAFitterN<3> ft; // 3 prong fitter + ft.setBz(bz); + ft.setPropagateToPCA(true); // After finding the vertex, propagate tracks to the DCA. This is default anyway + ft.setMaxR(200); // do not consider V0 seeds with 2D circles crossing above this R. This is default anyway + ft.setMaxDZIni(4); // do not consider V0 seeds with tracks Z-distance exceeding this. This is default anyway + ft.setMinParamChange(1e-3); // stop iterations if max correction is below this value. This is default anyway + ft.setMinRelChi2Change(0.9); // stop iterations if chi2 improves by less that this factor + + std::string treeName3A = "pr3a", treeName3AW = "pr3aw", treeName3W = "pr3w"; + TStopwatch swA, swAW, swW; + int nfoundA = 0, nfoundAW = 0, nfoundW = 0; + double meanDA = 0, meanDAW = 0, meanDW = 0; + swA.Stop(); + swAW.Stop(); + swW.Stop(); + for (int iev = 0; iev < NTest; iev++) { + auto genParent = generate(vtxGen, vctracks, bz, genPHS, dch, dchdec, forceQ); + + ft.setUseAbsDCA(true); + swA.Start(false); + int ncA = device::process(1, 1, ft, vctracks[0], vctracks[1], vctracks[2]); + swA.Stop(); + LOG(debug) << "fit abs.dist " << iev << " NC: " << ncA << " Chi2: " << (ncA ? ft.getChi2AtPCACandidate(0) : -1); + if (ncA) { + auto minD = checkResults(outStream, treeName3A, ft, vtxGen, genParent, dchdec); + meanDA += minD; + nfoundA++; + } + + ft.setUseAbsDCA(true); + ft.setWeightedFinalPCA(true); + swAW.Start(false); + int ncAW = device::process(1, 1, ft, vctracks[0], vctracks[1], vctracks[2]); // HERE WE FIT THE VERTICES + swAW.Stop(); + LOG(debug) << "fit abs.dist " << iev << " NC: " << ncAW << " Chi2: " << (ncAW ? ft.getChi2AtPCACandidate(0) : -1); + if (ncAW) { + auto minD = checkResults(outStream, treeName3AW, ft, vtxGen, genParent, dchdec); + meanDAW += minD; + nfoundAW++; + } + + ft.setUseAbsDCA(false); + ft.setWeightedFinalPCA(false); + swW.Start(false); + int ncW = device::process(1, 1, ft, vctracks[0], vctracks[1], vctracks[2]); // HERE WE FIT THE VERTICES + swW.Stop(); + LOG(debug) << "fit wgh.dist " << iev << " NC: " << ncW << " Chi2: " << (ncW ? ft.getChi2AtPCACandidate(0) : -1); + if (ncW) { + auto minD = checkResults(outStream, treeName3W, ft, vtxGen, genParent, dchdec); + meanDW += minD; + nfoundW++; + } + } + device::print(1, 1, ft); + meanDA /= nfoundA ? nfoundA : 1; + meanDAW /= nfoundAW ? nfoundAW : 1; + meanDW /= nfoundW ? nfoundW : 1; + LOG(info) << "Processed " << NTest << " 3-prong vertices"; + LOG(info) << "3-prongs with abs.dist minization: eff= " << float(nfoundA) / NTest + << " mean.dist to truth: " << meanDA << " CPU time: " << swA.CpuTime(); + LOG(info) << "3-prongs with abs.dist but wghPCA: eff= " << float(nfoundAW) / NTest + << " mean.dist to truth: " << meanDAW << " CPU time: " << swAW.CpuTime(); + LOG(info) << "3-prongs with wgh.dist minization: eff= " << float(nfoundW) / NTest + << " mean.dist to truth: " << meanDW << " CPU time: " << swW.CpuTime(); + BOOST_CHECK(nfoundA > 0.99 * NTest); + BOOST_CHECK(nfoundAW > 0.99 * NTest); + BOOST_CHECK(nfoundW > 0.99 * NTest); + BOOST_CHECK(meanDA < 0.1); + BOOST_CHECK(meanDAW < 0.1); + BOOST_CHECK(meanDW < 0.1); + } outStream.Close(); } diff --git a/Common/DCAFitter/GPU/hip/CMakeLists.txt b/Common/DCAFitter/GPU/hip/CMakeLists.txt index 39ddce02209ed..272d18a81bab4 100644 --- a/Common/DCAFitter/GPU/hip/CMakeLists.txt +++ b/Common/DCAFitter/GPU/hip/CMakeLists.txt @@ -9,7 +9,6 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -add_compile_definitions(GPUCA_GPU_DEBUG_PRINT) set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -fgpu-rdc") o2_add_hipified_library(DCAFitterHIP SOURCES ../cuda/DCAFitterN.cu diff --git a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h index 732bf058bd013..4ab7098b87052 100644 --- a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h +++ b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h @@ -1136,31 +1136,21 @@ GPUdi() bool DCAFitterN::propagateToX(o2::track::TrackParCov& t, flo using DCAFitter2 = DCAFitterN<2, o2::track::TrackParCov>; using DCAFitter3 = DCAFitterN<3, o2::track::TrackParCov>; -#ifdef GPUCA_GPUCODE -namespace gpu::kernel -{ -GPUg() void printKernel(o2::vertexing::DCAFitterN<2>* ft); -GPUg() void processKernel(o2::vertexing::DCAFitterN<2>* ft, o2::track::TrackParCov* t1, o2::track::TrackParCov* t2, int* res); -} // namespace gpu::kernel -#endif + namespace device { -void print(o2::vertexing::DCAFitterN<2>&, - const int nBlocks = 1, - const int nThreads = 1); - -int process(o2::vertexing::DCAFitterN<2>&, - o2::track::TrackParCov&, - o2::track::TrackParCov&, - const int nBlocks = 1, - const int nThreads = 1); - -template -int process(o2::vertexing::DCAFitterN<2>&, - const int nBlocks = 1, - const int nThreads = 1, - Args&... args); +template +void print(const int nBlocks, + const int nThreads, + Fitter& ft); + +template +int process(const int nBlocks, + const int nThreads, + Fitter&, + Tr&... args); } // namespace device + } // namespace vertexing } // namespace o2 #endif // _ALICEO2_DCA_FITTERN_