Skip to content

Commit

Permalink
Update methods
Browse files Browse the repository at this point in the history
  • Loading branch information
mconcas committed Sep 3, 2024
1 parent 362bb85 commit 0028831
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 7 deletions.
4 changes: 1 addition & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ cmake_minimum_required(VERSION 3.27.1 FATAL_ERROR)
project(O2 LANGUAGES C CXX VERSION 1.2.0)

include(CTest)
add_definitions(-DGPUCA_NO_FAST_MATH=1)
set(GPUCA_NO_FAST_MATH 1)
set(GPUCA_NO_FAST_MATH_WHOLEO2 1)

# Project wide setup

# Would better fit inside GPU/CMakeLists.txt, but include GPU/Common directly
Expand Down
6 changes: 3 additions & 3 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -418,8 +418,8 @@ void TimeFrameGPU<nLayers>::initDevice(IndexTableUtils* utils,
// mVerticesInChunks.resize(mGpuParams.nTimeFrameChunks);
// mNVerticesInChunks.resize(mGpuParams.nTimeFrameChunks);
// mLabelsInChunks.resize(mGpuParams.nTimeFrameChunks);
// LOGP(debug, "Size of fixed part is: {} MB", GpuTimeFrameChunk<nLayers>::computeFixedSizeBytes(mGpuParams) / MB);
// LOGP(debug, "Size of scaling part is: {} MB", GpuTimeFrameChunk<nLayers>::computeScalingSizeBytes(GpuTimeFrameChunk<nLayers>::computeRofPerChunk(mGpuParams, mAvailMemGB), mGpuParams) / MB);
// LOGP(info, "Size of fixed part is: {} MB", GpuTimeFrameChunk<nLayers>::computeFixedSizeBytes(mGpuParams) / MB);
// LOGP(info, "Size of scaling part is: {} MB", GpuTimeFrameChunk<nLayers>::computeScalingSizeBytes(GpuTimeFrameChunk<nLayers>::computeRofPerChunk(mGpuParams, mAvailMemGB), mGpuParams) / MB);
// LOGP(info, "Allocating {} chunks of {} rofs capacity each.", mGpuParams.nTimeFrameChunks, mGpuParams.nROFsPerChunk);

// for (int iChunk{0}; iChunk < mMemChunks.size(); ++iChunk) {
Expand Down Expand Up @@ -571,7 +571,7 @@ template <int nLayers>
void TimeFrameGPU<nLayers>::downloadTrackITSExtDevice(std::vector<CellSeed>& seeds)
{
LOGP(debug, "gpu-transfer: downloading {} tracks, for {} MB.", mTrackITSExt.size(), mTrackITSExt.size() * sizeof(o2::its::TrackITSExt) / MB);
checkGPUError(cudaMemcpyAsync(mTrackITSExt.data(), mTrackITSExtDevice, mTrackITSExt.size() * sizeof(o2::its::TrackITSExt), cudaMemcpyDeviceToHost, mGpuStreams[0].get()));
checkGPUError(cudaMemcpyAsync(mTrackITSExt.data(), mTrackITSExtDevice, seeds.size() * sizeof(o2::its::TrackITSExt), cudaMemcpyDeviceToHost, mGpuStreams[0].get()));
checkGPUError(cudaHostUnregister(mTrackITSExt.data()));
checkGPUError(cudaHostUnregister(seeds.data()));
discardResult(cudaDeviceSynchronize());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) {
lastCellSeed.swap(updatedCellSeed);
lastCellId.swap(updatedCellId);
updatedCellSeed.clear();
std::vector<CellSeed>().swap(updatedCellSeed); /// tame the memory peaks
updatedCellId.clear();
processNeighbours(iLayer, --level, lastCellSeed, lastCellId, updatedCellSeed, updatedCellId);
}
Expand Down

0 comments on commit 0028831

Please sign in to comment.