Skip to content
This repository has been archived by the owner on Dec 9, 2024. It is now read-only.

Clean up N_MAX objects #228

Merged
merged 6 commits into from
Jan 4, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions SDL/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,11 @@ typedef float FPX_seg;

const unsigned int MAX_BLOCKS = 80;
const unsigned int MAX_CONNECTED_MODULES = 40;
const unsigned int N_MAX_MD_PER_MODULES = 89;
const unsigned int N_MAX_SEGMENTS_PER_MODULE = 537; //Also used in (pixel)trackletDefualtAlgo in header files-> change there as well (hard coded)
const unsigned int N_MAX_PIXEL_MD_PER_MODULES = 100000;
const unsigned int N_MAX_PIXEL_SEGMENTS_PER_MODULE = 50000;

const unsigned int N_MAX_TRIPLETS_PER_MODULE = 1170;
const unsigned int N_MAX_QUINTUPLETS_PER_MODULE = 513;
const unsigned int N_MAX_PIXEL_TRIPLETS = 5000;
const unsigned int N_MAX_PIXEL_QUINTUPLETS = 15000;
const unsigned int N_MAX_TOTAL_TRIPLETS = 200000;

const unsigned int N_MAX_TRACK_CANDIDATES = 1000;
const unsigned int N_MAX_PIXEL_TRACK_CANDIDATES = 4000;
Expand Down
16 changes: 6 additions & 10 deletions SDL/Event.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ struct SDL::modules* SDL::modulesInGPU = nullptr;
struct SDL::pixelMap* SDL::pixelMapping = nullptr;
uint16_t SDL::nModules;
uint16_t SDL::nLowerModules;
unsigned int nTotalSegments;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can now see that you don't duplicate the creation of segment arrays, taking into account my comment (thank you, I will resolve that discussion). Because of this, I understand that you had to define the nTotalSegments in a more global scope, so that you can use it in multiple functions within the Event class.

However, you define it as a global variable and this can change from event to event. If we run multi-streaming, I would expect it to be changed by all of the events that run concurrently, leading to unexpected behavior, since all of them will have different values for that same variable. To save ourselves from this situation, I would make the nTotalSegments variable a private variable of the Event class (like this one), so that each event can have its own value.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks a lot for pointing it out Manos! I've put this into Event class to correct this :)


SDL::Event::Event(cudaStream_t estream)
{
Expand Down Expand Up @@ -749,8 +750,7 @@ void SDL::Event::addPixelSegmentToEvent(std::vector<unsigned int> hitIndices0,st
mdsInGPU = (SDL::miniDoublets*)cms::cuda::allocate_host(sizeof(SDL::miniDoublets), stream);
//hardcoded range numbers for this will come from studies!
unsigned int nTotalMDs;
createMDArrayRanges(*modulesInGPU, *rangesInGPU, nLowerModules, nTotalMDs, stream, N_MAX_MD_PER_MODULES, N_MAX_PIXEL_MD_PER_MODULES);

createMDArrayRanges(*modulesInGPU, *rangesInGPU, nLowerModules, nTotalMDs, stream, N_MAX_PIXEL_MD_PER_MODULES);
createMDsInExplicitMemory(*mdsInGPU, nTotalMDs, nLowerModules, N_MAX_PIXEL_MD_PER_MODULES,stream);

cudaMemcpyAsync(mdsInGPU->nMemoryLocations, &nTotalMDs, sizeof(unsigned int), cudaMemcpyHostToDevice, stream);
Expand All @@ -761,10 +761,8 @@ void SDL::Event::addPixelSegmentToEvent(std::vector<unsigned int> hitIndices0,st
{
segmentsInGPU = (SDL::segments*)cms::cuda::allocate_host(sizeof(SDL::segments), stream);
//hardcoded range numbers for this will come from studies!
unsigned int nTotalSegments;
createSegmentArrayRanges(*modulesInGPU, *rangesInGPU, *mdsInGPU, nLowerModules, nTotalSegments, stream, N_MAX_SEGMENTS_PER_MODULE, N_MAX_PIXEL_SEGMENTS_PER_MODULE);
// cout<<"nTotalSegments: "<<nTotalSegments<<std::endl; // for memory usage

//problem here: didn't distinguish pixel segments and outtracker segments. so they use the same memory index, which should be different and allocate dynamically
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, check the discussion on the same line from the previous PR.

createSegmentArrayRanges(*modulesInGPU, *rangesInGPU, *mdsInGPU, nLowerModules, nTotalSegments, stream, N_MAX_PIXEL_SEGMENTS_PER_MODULE);
createSegmentsInExplicitMemory(*segmentsInGPU, nTotalSegments, nLowerModules, N_MAX_PIXEL_SEGMENTS_PER_MODULE,stream);

cudaMemcpyAsync(segmentsInGPU->nMemoryLocations, &nTotalSegments, sizeof(unsigned int), cudaMemcpyHostToDevice, stream);;
Expand Down Expand Up @@ -1055,7 +1053,7 @@ void SDL::Event::createMiniDoublets()

//hardcoded range numbers for this will come from studies!
unsigned int nTotalMDs;
createMDArrayRanges(*modulesInGPU, *rangesInGPU, nLowerModules, nTotalMDs, stream, N_MAX_MD_PER_MODULES, N_MAX_PIXEL_MD_PER_MODULES);
createMDArrayRanges(*modulesInGPU, *rangesInGPU, nLowerModules, nTotalMDs, stream, N_MAX_PIXEL_MD_PER_MODULES);
// cout<<"nTotalMDs: "<<nTotalMDs<<std::endl; // for memory usage

if(mdsInGPU == nullptr)
Expand Down Expand Up @@ -1128,13 +1126,12 @@ void SDL::Event::createSegmentsWithModuleMap()
if(segmentsInGPU == nullptr)
{
segmentsInGPU = (SDL::segments*)cms::cuda::allocate_host(sizeof(SDL::segments), stream);
createSegmentsInExplicitMemory(*segmentsInGPU, N_MAX_SEGMENTS_PER_MODULE, nLowerModules, N_MAX_PIXEL_SEGMENTS_PER_MODULE,stream);
createSegmentsInExplicitMemory(*segmentsInGPU, nTotalSegments, nLowerModules, N_MAX_PIXEL_SEGMENTS_PER_MODULE,stream);
}

//HERE
dim3 cSnThreads(64,1,1);
uint32_t blks = nLowerModules;
//printf("HERE Num nLowerModules=%d Blks=%d\n",nLowerModules,blks);
dim3 cSnBlocks(blks,1,1);
SDL::createSegmentsInGPUv2<<<cSnBlocks,cSnThreads,0,stream>>>(*modulesInGPU, *mdsInGPU, *segmentsInGPU, *rangesInGPU);
cudaError_t cudaerr = cudaGetLastError();
Expand All @@ -1146,7 +1143,6 @@ void SDL::Event::createSegmentsWithModuleMap()
#if defined(AddObjects)
addSegmentsToEventExplicit();
#endif

}


Expand Down
5 changes: 1 addition & 4 deletions SDL/MiniDoublet.cu
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,14 @@
void SDL::miniDoublets::resetMemory(unsigned int nMemoryLocationsx, unsigned int nLowerModules,cudaStream_t stream)

{
//unsigned int nMemoryLocations = maxMDsPerModule * nLowerModules + maxPixelMDs;
cudaMemsetAsync(anchorHitIndices,0, nMemoryLocationsx * 3 * sizeof(unsigned int),stream);
cudaMemsetAsync(dphichanges,0, nMemoryLocationsx * 9 * sizeof(float),stream);
cudaMemsetAsync(nMDs,0, (nLowerModules + 1) * sizeof(unsigned int),stream);
cudaMemsetAsync(totOccupancyMDs,0, (nLowerModules + 1) * sizeof(unsigned int),stream);
}


void SDL::createMDArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, uint16_t& nLowerModules, unsigned int& nTotalMDs, cudaStream_t stream, const unsigned int& maxMDsPerModule, const unsigned int& maxPixelMDs)
void SDL::createMDArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, uint16_t& nLowerModules, unsigned int& nTotalMDs, cudaStream_t stream, const unsigned int& maxPixelMDs)
{
/*
write code here that will deal with importing module parameters to CPU, and get the relevant occupancies for a given module!*/
Expand Down Expand Up @@ -69,8 +68,6 @@ void SDL::createMDArrayRanges(struct modules& modulesInGPU, struct objectRanges&
if (category_number == 3 && eta_number == 2) occupancy = 20;
if (category_number == 3 && eta_number == 3) occupancy = 25;

// occupancy = maxMDsPerModule;

nTotalMDs += occupancy;
}

Expand Down
2 changes: 1 addition & 1 deletion SDL/MiniDoublet.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ namespace SDL
void createMDsInExplicitMemory(struct miniDoublets& mdsInGPU, unsigned int maxMDs,uint16_t nLowerModules, unsigned int maxPixelMDs,cudaStream_t stream);


void createMDArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, uint16_t& nLowerModules, unsigned int& nTotalMDs, cudaStream_t stream, const unsigned int& maxMDsPerModule, const unsigned int& maxPixelMDs);
void createMDArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, uint16_t& nLowerModules, unsigned int& nTotalMDs, cudaStream_t stream, const unsigned int& maxPixelMDs);


//#ifdef CUT_VALUE_DEBUG
Expand Down
3 changes: 0 additions & 3 deletions SDL/Quintuplet.cu
Original file line number Diff line number Diff line change
Expand Up @@ -174,15 +174,12 @@ __global__ void SDL::createEligibleModulesListForQuintupletsGPU(struct modules&
if (category_number == 3 && eta_number == 3) occupancy = 106;

unsigned int nTotQ = atomicAdd(&nTotalQuintupletsx,occupancy);
// if (nTotQ == 0) printf("%u\n",occupancy);
// rangesInGPU.quintupletModuleIndices[i] = nTotQ-occupancy;
rangesInGPU.quintupletModuleIndices[i] = nTotQ;
rangesInGPU.indicesOfEligibleT5Modules[nEligibleT5Modules] = i;
}
__syncthreads();
if(threadIdx.x==0){
*rangesInGPU.nEligibleT5Modules = static_cast<uint16_t>(nEligibleT5Modulesx);
// printf("nTotalT5 %u\n",nTotalQuintupletsx);
*device_nTotalQuintuplets = nTotalQuintupletsx;
}
}
Expand Down
2 changes: 1 addition & 1 deletion SDL/Segment.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ void SDL::segments::resetMemory(unsigned int nMemoryLocationsx, unsigned int nLo
}


void SDL::createSegmentArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct miniDoublets& mdsInGPU, uint16_t& nLowerModules, unsigned int& nTotalSegments, cudaStream_t stream, const uint16_t& maxSegmentsPerModule, const uint16_t& maxPixelSegments)
void SDL::createSegmentArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct miniDoublets& mdsInGPU, uint16_t& nLowerModules, unsigned int& nTotalSegments, cudaStream_t stream, const uint16_t& maxPixelSegments)
{
/*
write code here that will deal with importing module parameters to CPU, and get the relevant occupancies for a given module!*/
Expand Down
3 changes: 1 addition & 2 deletions SDL/Segment.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ namespace SDL

void createSegmentsInExplicitMemory(struct segments& segmentsInGPU, unsigned int maxSegments, uint16_t nLowerModules, unsigned int maxPixelSegments,cudaStream_t stream);

void createSegmentArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct miniDoublets& mdsinGPU, uint16_t& nLowerModules, unsigned int& nSegments, cudaStream_t stream, const uint16_t& maxSegmentsPerModule, const uint16_t& maxPixelSegments);

void createSegmentArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct miniDoublets& mdsinGPU, uint16_t& nLowerModules, unsigned int& nSegments, cudaStream_t stream, const uint16_t& maxPixelSegments);

CUDA_DEV void dAlphaThreshold(float* dAlphaThresholdValues, struct modules& modulesInGPU, struct miniDoublets& mdsInGPU, float& xIn, float& yIn, float& zIn, float& rtIn, float& xOut, float& yOut, float& zOut, float& rtOut, uint16_t& innerLowerModuleIndex, uint16_t& outerLowerModuleIndex, unsigned int& innerMDIndex, unsigned int& outerMDIndex);
CUDA_DEV void addSegmentToMemory(struct segments& segmentsInGPU, unsigned int lowerMDIndex, unsigned int upperMDIndex, uint16_t innerLowerModuleIndex, uint16_t outerLowerModuleIndex, unsigned int innerMDAnchorHitIndex, unsigned int outerMDAnchorHitIndex, float& dPhi, float& dPhiMin, float& dPhiMax, float& dPhiChange, float& dPhiChangeMin, float& dPhiChangeMax, unsigned int idx);
Expand Down
22 changes: 5 additions & 17 deletions code/core/write_sdl_ntuple.cc
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
#include "write_sdl_ntuple.h"

#define N_MAX_MD_PER_MODULES 89
#define N_MAX_SEGMENTS_PER_MODULE 537
#define MAX_NTRIPLET_PER_MODULE 1170
#define MAX_NQUINTUPLET_PER_MODULE 513


//________________________________________________________________________________________________________________________________
void createOutputBranches()
{
Expand Down Expand Up @@ -530,20 +524,14 @@ void printMDs(SDL::Event* event)
SDL::miniDoublets& miniDoubletsInGPU = (*event->getMiniDoublets());
SDL::hits& hitsInGPU = (*event->getHits());
SDL::modules& modulesInGPU = (*event->getModules());
SDL::objectRanges& rangesInGPU = (*event->getRanges());

// Then obtain the lower module index
for (unsigned int idx = 0; idx <= *(modulesInGPU.nLowerModules); ++idx)
{
for (unsigned int jdx = 0; jdx < miniDoubletsInGPU.nMDs[2*idx]; jdx++)
{
unsigned int mdIdx = (2*idx) * N_MAX_MD_PER_MODULES + jdx;
unsigned int LowerHitIndex = miniDoubletsInGPU.anchorHitIndices[mdIdx];
unsigned int UpperHitIndex = miniDoubletsInGPU.outerHitIndices[mdIdx];
unsigned int hit0 = hitsInGPU.idxs[LowerHitIndex];
unsigned int hit1 = hitsInGPU.idxs[UpperHitIndex];
std::cout << "VALIDATION 'MD': " << "MD" << " hit0: " << hit0 << " hit1: " << hit1 << std::endl;
}
for (unsigned int jdx = 0; jdx < miniDoubletsInGPU.nMDs[2*idx+1]; jdx++)
for (unsigned int iMD = 0; iMD < miniDoubletsInGPU.nMDs[idx]; iMD++)
{
unsigned int mdIdx = (2*idx+1) * N_MAX_MD_PER_MODULES + jdx;
unsigned int mdIdx = rangesInGPU.miniDoubletModuleIndices[idx] + iMD;
unsigned int LowerHitIndex = miniDoubletsInGPU.anchorHitIndices[mdIdx];
unsigned int UpperHitIndex = miniDoubletsInGPU.outerHitIndices[mdIdx];
unsigned int hit0 = hitsInGPU.idxs[LowerHitIndex];
Expand Down