diff --git a/SDL/Event.cu b/SDL/Event.cu
index b2d5572d..928cbbf4 100644
--- a/SDL/Event.cu
+++ b/SDL/Event.cu
@@ -65,7 +65,6 @@ SDL::Event::Event(cudaStream_t estream)
             n_quintuplets_by_layer_endcap_[i] = 0;
         }
     }
-    //resetObjectsInModule();
 }
 
 SDL::Event::~Event()
@@ -506,7 +505,6 @@ void SDL::initModules(const char* moduleMetaDataFilePath)
     //resetObjectRanges(*modulesInGPU,nModules, default_stream);
 }
 
-
 void SDL::cleanModules()
 {
   //cudaStream_t default_stream = 0;
@@ -886,10 +884,10 @@ void SDL::Event::addMiniDoubletsToEvent()
             {
                 n_minidoublets_by_layer_endcap_[modulesInGPU->layers[i] - 1] += mdsInGPU->nMDs[i];
             }
-
         }
     }
 }
+
 void SDL::Event::addMiniDoubletsToEventExplicit()
 {
     uint16_t nLowerModules;
@@ -949,6 +947,7 @@ void SDL::Event::addMiniDoubletsToEventExplicit()
     cms::cuda::free_host(module_hitRanges);
     cms::cuda::free_host(module_miniDoubletModuleIndices);
 }
+
 void SDL::Event::addSegmentsToEvent()
 {
     for(unsigned int i = 0; i<*(SDL::modulesInGPU->nLowerModules); i++)
@@ -975,6 +974,7 @@ void SDL::Event::addSegmentsToEvent()
         }
     }
 }
+
 void SDL::Event::addSegmentsToEventExplicit()
 {
     uint16_t nLowerModules;
@@ -1040,7 +1040,6 @@ void SDL::Event::createMiniDoublets()
     //hardcoded range numbers for this will come from studies!
     unsigned int nTotalMDs;
     createMDArrayRanges(*modulesInGPU, *rangesInGPU, nLowerModules, nTotalMDs, stream, N_MAX_PIXEL_MD_PER_MODULES);
-//    cout<<"nTotalMDs: "<<nTotalMDs<<std::endl; // for memory usage
 
     if(mdsInGPU == nullptr)
     {
@@ -1105,9 +1104,8 @@ void SDL::Event::createMiniDoublets()
     alpaka::wait(queue);
 
 #if defined(AddObjects)
-addMiniDoubletsToEventExplicit();
+    addMiniDoubletsToEventExplicit();
 #endif
-
 }
 
 void SDL::Event::createSegmentsWithModuleMap()
@@ -1157,12 +1155,10 @@ void SDL::Event::createTriplets()
         tripletsInGPU = (SDL::triplets*)cms::cuda::allocate_host(sizeof(SDL::triplets), stream);
         unsigned int maxTriplets;
         createTripletArrayRanges(*modulesInGPU, *rangesInGPU, *segmentsInGPU, nLowerModules, maxTriplets, stream);
-//        cout<<"nTotalTriplets: "<<maxTriplets<<std::endl; // for memory usage
         createTripletsInExplicitMemory(*tripletsInGPU, maxTriplets, nLowerModules,stream);
 
         cudaMemcpyAsync(tripletsInGPU->nMemoryLocations, &maxTriplets, sizeof(unsigned int), cudaMemcpyHostToDevice, stream);
         cudaStreamSynchronize(stream);
-
     }
     //TODO:Move this also inside the ranges function
     uint16_t nonZeroModules=0;
@@ -1194,7 +1190,6 @@ void SDL::Event::createTriplets()
     cudaMemcpyAsync(index_gpu, index, nonZeroModules*sizeof(uint16_t), cudaMemcpyHostToDevice,stream);
     cudaStreamSynchronize(stream);
 
-
     // Temporary fix for queue initialization.
     QueueAcc queue(devAcc);
 
@@ -1300,7 +1295,7 @@ void SDL::Event::createTrackCandidates()
     }cudaStreamSynchronize(stream);
 
     unsigned int nThreadsx_pLS = 384;
-    unsigned int nBlocksx_pLS = MAX_BLOCKS;//(20000) % nThreadsx_pLS == 0 ? 20000 / nThreadsx_pLS : 20000 / nThreadsx_pLS + 1;
+    unsigned int nBlocksx_pLS = MAX_BLOCKS;
     SDL::addpLSasTrackCandidateInGPU<<<nBlocksx_pLS, nThreadsx_pLS, 0, stream>>>(*modulesInGPU, *trackCandidatesInGPU, *segmentsInGPU);
     cudaError_t cudaerr_pLSTC = cudaGetLastError();
     if(cudaerr_pLSTC != cudaSuccess)
@@ -1345,13 +1340,12 @@ void SDL::Event::createPixelTriplets()
     connectedPixelSize_dev = (unsigned int*)cms::cuda::allocate_device(dev, nInnerSegments*sizeof(unsigned int), stream);
     connectedPixelIndex_dev = (unsigned int*)cms::cuda::allocate_device(dev, nInnerSegments*sizeof(unsigned int), stream);
 
-    // unsigned int max_size =0;
     cudaStreamSynchronize(stream);
     int pixelIndexOffsetPos = pixelMapping->connectedPixelsIndex[44999] + pixelMapping->connectedPixelsSizes[44999];
     int pixelIndexOffsetNeg = pixelMapping->connectedPixelsIndexPos[44999] + pixelMapping->connectedPixelsSizes[44999] + pixelIndexOffsetPos;
 
     // TODO: check if a map/reduction to just eligible pLSs would speed up the kernel
-    //   the current selection still leaves a significant fraction of unmatchable pLSs
+    // the current selection still leaves a significant fraction of unmatchable pLSs
     for (unsigned int i = 0; i < nInnerSegments; i++)
     {// loop over # pLS
         int8_t pixelType = pixelTypes[i];// get pixel type for this pLS
@@ -1432,7 +1426,6 @@ void SDL::Event::createPixelTriplets()
 #endif
 
     //pT3s can be cleaned here because they're not used in making pT5s!
-    //dim3 nThreads_dup(160,1,1);
     dim3 nThreads_dup(32,32,1);
     dim3 nBlocks_dup(1,40,1); //seems like more blocks lead to conflicting writes
     removeDupPixelTripletsInGPUFromMap<<<nBlocks_dup,nThreads_dup,0,stream>>>(*pixelTripletsInGPU,false);
@@ -1443,27 +1436,47 @@ void SDL::Event::createQuintuplets()
 {
     uint16_t nLowerModules;
     cudaMemcpyAsync(&nLowerModules,modulesInGPU->nLowerModules,sizeof(uint16_t),cudaMemcpyDeviceToHost,stream);
-cudaStreamSynchronize(stream);
+    cudaStreamSynchronize(stream);
 
     uint16_t nEligibleT5Modules = 0;
-    //uint16_t *indicesOfEligibleModules = (uint16_t*)malloc(nLowerModules*sizeof(uint16_t));
 
 #ifdef CACHE_ALLOC
-        rangesInGPU->indicesOfEligibleT5Modules = (uint16_t*)cms::cuda::allocate_device(dev, nLowerModules * sizeof(uint16_t), stream);
+    rangesInGPU->indicesOfEligibleT5Modules = (uint16_t*)cms::cuda::allocate_device(dev, nLowerModules * sizeof(uint16_t), stream);
 #else
-        cudaMalloc(&(rangesInGPU->indicesOfEligibleT5Modules), nLowerModules * sizeof(uint16_t));
+    cudaMalloc(&(rangesInGPU->indicesOfEligibleT5Modules), nLowerModules * sizeof(uint16_t));
 #endif
     cudaMemsetAsync(rangesInGPU->quintupletModuleIndices, -1, sizeof(int) * (nLowerModules),stream);
-cudaStreamSynchronize(stream);
+    cudaStreamSynchronize(stream);
     unsigned int nTotalQuintuplets;
     unsigned int *device_nTotalQuintuplets;
     cudaMalloc((void **)&device_nTotalQuintuplets, sizeof(unsigned int));
-    createEligibleModulesListForQuintupletsGPU<<<1,1024,0,stream>>>(*modulesInGPU, *tripletsInGPU, device_nTotalQuintuplets, stream, *rangesInGPU);
-cudaStreamSynchronize(stream);
+
+    // Temporary fix for queue initialization.
+    QueueAcc queue(devAcc);
+
+    Vec const threadsPerBlockCreateQuints(static_cast<Idx>(1), static_cast<Idx>(1), static_cast<Idx>(1024));
+    Vec const blocksPerGridCreateQuints(static_cast<Idx>(1), static_cast<Idx>(1), static_cast<Idx>(1));
+
+    WorkDiv const createEligibleModulesListForQuintupletsGPU_workDiv(blocksPerGridCreateQuints, threadsPerBlockCreateQuints, elementsPerThread);
+
+    SDL::createEligibleModulesListForQuintupletsGPU createEligibleModulesListForQuintupletsGPU_kernel;
+    auto const createEligibleModulesListForQuintupletsGPUTask(alpaka::createTaskKernel<Acc>(
+        createEligibleModulesListForQuintupletsGPU_workDiv,
+        createEligibleModulesListForQuintupletsGPU_kernel,
+        *modulesInGPU,
+        *tripletsInGPU,
+        *rangesInGPU,
+        device_nTotalQuintuplets));
+
+    alpaka::enqueue(queue, createEligibleModulesListForQuintupletsGPUTask);
+    alpaka::wait(queue);
+
     cudaMemcpyAsync(&nEligibleT5Modules,rangesInGPU->nEligibleT5Modules,sizeof(uint16_t),cudaMemcpyDeviceToHost,stream);
     cudaMemcpyAsync(&nTotalQuintuplets,device_nTotalQuintuplets,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+    cudaStreamSynchronize(stream);
+
     cudaFree(device_nTotalQuintuplets);
-cudaStreamSynchronize(stream);
+    cudaStreamSynchronize(stream);
 
     if(quintupletsInGPU == nullptr)
     {
@@ -1471,23 +1484,27 @@ cudaStreamSynchronize(stream);
         createQuintupletsInExplicitMemory(*quintupletsInGPU, nTotalQuintuplets, nLowerModules, nEligibleT5Modules,stream);
         cudaMemcpyAsync(quintupletsInGPU->nMemoryLocations, &nTotalQuintuplets, sizeof(unsigned int), cudaMemcpyHostToDevice, stream);
         cudaStreamSynchronize(stream);
-
     }
-cudaStreamSynchronize(stream);
 
+    Vec const threadsPerBlockQuints(static_cast<Idx>(1), static_cast<Idx>(8), static_cast<Idx>(32));
+    Vec const blocksPerGridQuints(static_cast<Idx>(max(nEligibleT5Modules,1)), static_cast<Idx>(1), static_cast<Idx>(1));
 
+    WorkDiv const createQuintupletsInGPUv2_workDiv(blocksPerGridQuints, threadsPerBlockQuints, elementsPerThread);
 
-    dim3 nThreads(32, 8, 1);
-    dim3 nBlocks(1,1,max(nEligibleT5Modules,1));
+    SDL::createQuintupletsInGPUv2 createQuintupletsInGPUv2_kernel;
+    auto const createQuintupletsInGPUv2Task(alpaka::createTaskKernel<Acc>(
+        createQuintupletsInGPUv2_workDiv,
+        createQuintupletsInGPUv2_kernel,
+        *modulesInGPU,
+        *mdsInGPU,
+        *segmentsInGPU,
+        *tripletsInGPU,
+        *quintupletsInGPU,
+        *rangesInGPU,
+        nEligibleT5Modules));
 
-    SDL::createQuintupletsInGPUv2<<<nBlocks,nThreads,0,stream>>>(*modulesInGPU, *mdsInGPU, *segmentsInGPU, *tripletsInGPU, *quintupletsInGPU, *rangesInGPU,nEligibleT5Modules);
-    cudaError_t cudaerr = cudaGetLastError();
-    if(cudaerr != cudaSuccess)
-    {
-	    std::cout<<"sync failed with error : "<<cudaGetErrorString(cudaerr)<<std::endl;
-    }
-    cudaStreamSynchronize(stream);
-    //free(indicesOfEligibleModules);
+    alpaka::enqueue(queue, createQuintupletsInGPUv2Task);
+    alpaka::wait(queue);
 
     dim3 dupThreads(32,32,1);
     dim3 dupBlocks(1,1,MAX_BLOCKS);
@@ -1497,11 +1514,10 @@ cudaStreamSynchronize(stream);
 #if defined(AddObjects)
     addQuintupletsToEventExplicit();
 #endif
-
 }
+
 void SDL::Event::pixelLineSegmentCleaning()
 {
-    //printf("cleaning pixels\n");
     dim3 nThreadspLS(32,32,1);
     dim3 nBlockspLS(MAX_BLOCKS/4, MAX_BLOCKS*4, 1);
 
@@ -1510,9 +1526,8 @@ void SDL::Event::pixelLineSegmentCleaning()
     if(cudaerrpix != cudaSuccess)
     {
         std::cout<<"sync failed with error : "<<cudaGetErrorString(cudaerrpix)<<std::endl;
-
-    }cudaStreamSynchronize(stream);
-    //}cudaDeviceSynchronize();
+    }
+    cudaStreamSynchronize(stream);
 }
 
 void SDL::Event::createPixelQuintuplets()
@@ -1534,22 +1549,22 @@ void SDL::Event::createPixelQuintuplets()
     unsigned int pixelModuleIndex;
     int* superbins;
     int8_t* pixelTypes;
-    unsigned int *nQuintuplets;
+    int *nQuintuplets;
 
     unsigned int* connectedPixelSize_host;
     unsigned int* connectedPixelIndex_host;
     unsigned int* connectedPixelSize_dev;
     unsigned int* connectedPixelIndex_dev;
 
-    nQuintuplets = (unsigned int*)cms::cuda::allocate_host(nLowerModules * sizeof(unsigned int), stream);
-    cudaMemcpyAsync(nQuintuplets, quintupletsInGPU->nQuintuplets, nLowerModules * sizeof(unsigned int), cudaMemcpyDeviceToHost,stream);
+    nQuintuplets = (int*)cms::cuda::allocate_host(nLowerModules * sizeof(int), stream);
+    cudaMemcpyAsync(nQuintuplets, quintupletsInGPU->nQuintuplets, nLowerModules * sizeof(int), cudaMemcpyDeviceToHost,stream);
 
     superbins = (int*)cms::cuda::allocate_host(N_MAX_PIXEL_SEGMENTS_PER_MODULE*sizeof(int), stream);
     pixelTypes = (int8_t*)cms::cuda::allocate_host(N_MAX_PIXEL_SEGMENTS_PER_MODULE*sizeof(int8_t), stream);
 
     cudaMemcpyAsync(superbins,segmentsInGPU->superbin,N_MAX_PIXEL_SEGMENTS_PER_MODULE*sizeof(int),cudaMemcpyDeviceToHost,stream);
     cudaMemcpyAsync(pixelTypes,segmentsInGPU->pixelType,N_MAX_PIXEL_SEGMENTS_PER_MODULE*sizeof(int8_t),cudaMemcpyDeviceToHost,stream);
-    
+
     cudaStreamSynchronize(stream);
     pixelModuleIndex = nLowerModules;
     unsigned int nInnerSegments = 0;
@@ -1598,7 +1613,7 @@ void SDL::Event::createPixelQuintuplets()
     cudaMemcpyAsync(connectedPixelIndex_dev, connectedPixelIndex_host, nInnerSegments*sizeof(unsigned int), cudaMemcpyHostToDevice,stream);
     cudaStreamSynchronize(stream);
 
-   // Temporary fix for queue initialization.
+    // Temporary fix for queue initialization.
     QueueAcc queue(devAcc);
 
     Vec const threadsPerBlock(static_cast<Idx>(1), static_cast<Idx>(16), static_cast<Idx>(16));
@@ -1634,15 +1649,16 @@ void SDL::Event::createPixelQuintuplets()
 
     dim3 nThreads_dup(32,32,1);
     dim3 nBlocks_dup(1,MAX_BLOCKS,1);
-    //printf("run dup pT5\n");
+
     removeDupPixelQuintupletsInGPUFromMap<<<nBlocks_dup,nThreads_dup,0,stream>>>(*pixelQuintupletsInGPU, false);
     cudaError_t cudaerr2 = cudaGetLastError(); 
     if(cudaerr2 != cudaSuccess)
     {
         std::cout<<"sync failed with error : "<<cudaGetErrorString(cudaerr2)<<std::endl;
-    }cudaStreamSynchronize(stream);
+    }
+    cudaStreamSynchronize(stream);
     unsigned int nThreadsx_pT5 = 256;
-    unsigned int nBlocksx_pT5 = 1;//(N_MAX_PIXEL_QUINTUPLETS) % nThreadsx_pT5 == 0 ? N_MAX_PIXEL_QUINTUPLETS / nThreadsx_pT5 : N_MAX_PIXEL_QUINTUPLETS / nThreadsx_pT5 + 1;
+    unsigned int nBlocksx_pT5 = 1;
     SDL::addpT5asTrackCandidateInGPU<<<nBlocksx_pT5, nThreadsx_pT5,0,stream>>>(*modulesInGPU, *pixelQuintupletsInGPU, *trackCandidatesInGPU, *segmentsInGPU, *tripletsInGPU,*quintupletsInGPU);
 
     cudaError_t cudaerr_pT5 = cudaGetLastError();
@@ -1691,10 +1707,9 @@ void SDL::Event::addQuintupletsToEventExplicit()
     cudaMemcpyAsync(&nLowerModules,modulesInGPU->nLowerModules,sizeof(uint16_t),cudaMemcpyDeviceToHost,stream);
     cudaStreamSynchronize(stream);
 
-    unsigned int* nQuintupletsCPU;
-    nQuintupletsCPU = (unsigned int*)cms::cuda::allocate_host(nLowerModules * sizeof(unsigned int), stream);
-
-    cudaMemcpyAsync(nQuintupletsCPU,quintupletsInGPU->nQuintuplets,nLowerModules*sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+    int* nQuintupletsCPU;
+    nQuintupletsCPU = (int*)cms::cuda::allocate_host(nLowerModules * sizeof(int), stream);
+    cudaMemcpyAsync(nQuintupletsCPU,quintupletsInGPU->nQuintuplets,nLowerModules*sizeof(int),cudaMemcpyDeviceToHost,stream);
 
     short* module_subdets;
     module_subdets = (short*)cms::cuda::allocate_host(nModules* sizeof(short), stream);
@@ -1708,7 +1723,7 @@ void SDL::Event::addQuintupletsToEventExplicit()
     int* module_quintupletModuleIndices;
     module_quintupletModuleIndices = (int*)cms::cuda::allocate_host(nLowerModules * sizeof(int), stream);
     cudaMemcpyAsync(module_quintupletModuleIndices, rangesInGPU->quintupletModuleIndices, nLowerModules * sizeof(int), cudaMemcpyDeviceToHost,stream);
-cudaStreamSynchronize(stream);
+    cudaStreamSynchronize(stream);
     for(uint16_t i = 0; i<nLowerModules; i++)
     {
         if(nQuintupletsCPU[i] == 0 or module_quintupletModuleIndices[i] == -1)
@@ -1736,7 +1751,6 @@ cudaStreamSynchronize(stream);
     cms::cuda::free_host(module_layers);
     cms::cuda::free_host(module_subdets);
     cms::cuda::free_host(module_quintupletModuleIndices);
-
 }
 
 void SDL::Event::addTripletsToEvent()
@@ -1765,6 +1779,7 @@ void SDL::Event::addTripletsToEvent()
         }
     }
 }
+
 void SDL::Event::addTripletsToEventExplicit()
 {
     uint16_t nLowerModules;
@@ -1869,12 +1884,11 @@ unsigned int SDL::Event::getNumberOfMiniDoublets()
     }
 
     return miniDoublets;
-
 }
 
 unsigned int SDL::Event::getNumberOfMiniDoubletsByLayer(unsigned int layer)
 {
-     if(layer == 6)
+    if(layer == 6)
         return n_minidoublets_by_layer_barrel_[layer];
     else
         return n_minidoublets_by_layer_barrel_[layer] + n_minidoublets_by_layer_endcap_[layer];
@@ -1903,7 +1917,6 @@ unsigned int SDL::Event::getNumberOfSegments()
     }
 
     return segments;
-
 }
 
 unsigned int SDL::Event::getNumberOfSegmentsByLayer(unsigned int layer)
@@ -1937,7 +1950,6 @@ unsigned int SDL::Event::getNumberOfTriplets()
     }
 
     return triplets;
-
 }
 
 unsigned int SDL::Event::getNumberOfTripletsByLayer(unsigned int layer)
@@ -1986,7 +1998,6 @@ unsigned int SDL::Event::getNumberOfQuintuplets()
     }
 
     return quintuplets;
-
 }
 
 unsigned int SDL::Event::getNumberOfQuintupletsByLayer(unsigned int layer)
@@ -2011,7 +2022,7 @@ unsigned int SDL::Event::getNumberOfTrackCandidates()
 {    
     unsigned int nTrackCandidates;
     cudaMemcpyAsync(&nTrackCandidates, trackCandidatesInGPU->nTrackCandidates, sizeof(unsigned int), cudaMemcpyDeviceToHost,stream);
-cudaStreamSynchronize(stream);
+    cudaStreamSynchronize(stream);
 
     return nTrackCandidates;
 }
@@ -2020,7 +2031,7 @@ unsigned int SDL::Event::getNumberOfPT5TrackCandidates()
 {
     unsigned int nTrackCandidatesPT5;
     cudaMemcpyAsync(&nTrackCandidatesPT5, trackCandidatesInGPU->nTrackCandidatespT5, sizeof(unsigned int), cudaMemcpyDeviceToHost,stream);
-cudaStreamSynchronize(stream);
+    cudaStreamSynchronize(stream);
 
     return nTrackCandidatesPT5;
 }
@@ -2029,7 +2040,7 @@ unsigned int SDL::Event::getNumberOfPT3TrackCandidates()
 {
     unsigned int nTrackCandidatesPT3;
     cudaMemcpyAsync(&nTrackCandidatesPT3, trackCandidatesInGPU->nTrackCandidatespT3, sizeof(unsigned int), cudaMemcpyDeviceToHost,stream);
-cudaStreamSynchronize(stream);
+    cudaStreamSynchronize(stream);
 
     return nTrackCandidatesPT3;
 }
@@ -2038,7 +2049,7 @@ unsigned int SDL::Event::getNumberOfPLSTrackCandidates()
 {
     unsigned int nTrackCandidatesPLS;
     cudaMemcpyAsync(&nTrackCandidatesPLS, trackCandidatesInGPU->nTrackCandidatespLS, sizeof(unsigned int), cudaMemcpyDeviceToHost,stream);
-cudaStreamSynchronize(stream);
+    cudaStreamSynchronize(stream);
 
     return nTrackCandidatesPLS;
 }
@@ -2049,7 +2060,7 @@ unsigned int SDL::Event::getNumberOfPixelTrackCandidates()
     unsigned int nTrackCandidatesT5;
     cudaMemcpyAsync(&nTrackCandidates, trackCandidatesInGPU->nTrackCandidates, sizeof(unsigned int), cudaMemcpyDeviceToHost,stream);
     cudaMemcpyAsync(&nTrackCandidatesT5, trackCandidatesInGPU->nTrackCandidatesT5, sizeof(unsigned int), cudaMemcpyDeviceToHost,stream);
-cudaStreamSynchronize(stream);
+    cudaStreamSynchronize(stream);
 
     return nTrackCandidates - nTrackCandidatesT5;
 }
@@ -2087,6 +2098,7 @@ SDL::hits* SDL::Event::getHits() //std::shared_ptr should take care of garbage c
     }
     return hitsInCPU;
 }
+
 SDL::objectRanges* SDL::Event::getRanges()
 {
     uint16_t nLowerModules;
@@ -2105,7 +2117,7 @@ SDL::objectRanges* SDL::Event::getRanges()
         cudaMemcpyAsync(rangesInCPU->miniDoubletModuleIndices, rangesInGPU->miniDoubletModuleIndices, (nLowerModules + 1) * sizeof(int), cudaMemcpyDeviceToHost, stream);
         cudaMemcpyAsync(rangesInCPU->segmentModuleIndices, rangesInGPU->segmentModuleIndices, (nLowerModules + 1) * sizeof(int), cudaMemcpyDeviceToHost, stream);
         cudaMemcpyAsync(rangesInCPU->tripletModuleIndices, rangesInGPU->tripletModuleIndices, nLowerModules * sizeof(int), cudaMemcpyDeviceToHost, stream);
-cudaStreamSynchronize(stream);
+        cudaStreamSynchronize(stream);
     }
     return rangesInCPU;
 }
@@ -2197,7 +2209,6 @@ SDL::triplets* SDL::Event::getTriplets()
         tripletsInCPU->hitIndices = new unsigned int[6 * *(tripletsInCPU->nMemoryLocations)];
         tripletsInCPU->logicalLayers = new uint8_t[3 * *(tripletsInCPU->nMemoryLocations)];
 #ifdef CUT_VALUE_DEBUG
-
         tripletsInCPU->zOut = new float[4 * *(tripletsInCPU->nMemoryLocations)];
         tripletsInCPU->zLo = new float[*(tripletsInCPU->nMemoryLocations)];
         tripletsInCPU->zHi = new float[*(tripletsInCPU->nMemoryLocations)];
@@ -2228,7 +2239,6 @@ SDL::triplets* SDL::Event::getTriplets()
         cudaMemcpyAsync(tripletsInCPU->rtHi, tripletsInGPU->rtHi,  * (tripletsInCPU->nMemoryLocations)* sizeof(unsigned int), cudaMemcpyDeviceToHost, stream);
         cudaMemcpyAsync(tripletsInCPU->kZ, tripletsInGPU->kZ,  * (tripletsInCPU->nMemoryLocations) * sizeof(unsigned int), cudaMemcpyDeviceToHost, stream);
 #endif
-
         cudaMemcpyAsync(tripletsInCPU->hitIndices, tripletsInGPU->hitIndices, 6 * *(tripletsInCPU->nMemoryLocations) * sizeof(unsigned int), cudaMemcpyDeviceToHost, stream);
         cudaMemcpyAsync(tripletsInCPU->logicalLayers, tripletsInGPU->logicalLayers, 3 * *(tripletsInCPU->nMemoryLocations) * sizeof(uint8_t), cudaMemcpyDeviceToHost, stream);
         cudaMemcpyAsync(tripletsInCPU->segmentIndices, tripletsInGPU->segmentIndices, 2 * *(tripletsInCPU->nMemoryLocations) * sizeof(unsigned int), cudaMemcpyDeviceToHost,stream);
@@ -2257,8 +2267,8 @@ SDL::quintuplets* SDL::Event::getQuintuplets()
         cudaMemcpyAsync(&nMemoryLocations, quintupletsInGPU->nMemoryLocations, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream);
         cudaStreamSynchronize(stream);
 
-        quintupletsInCPU->nQuintuplets = new unsigned int[nLowerModules];
-        quintupletsInCPU->totOccupancyQuintuplets = new unsigned int[nLowerModules];
+        quintupletsInCPU->nQuintuplets = new int[nLowerModules];
+        quintupletsInCPU->totOccupancyQuintuplets = new int[nLowerModules];
         quintupletsInCPU->tripletIndices = new unsigned int[2 * nMemoryLocations];
         quintupletsInCPU->lowerModuleIndices = new uint16_t[5 * nMemoryLocations];
         quintupletsInCPU->innerRadius = new FPX[nMemoryLocations];
@@ -2274,9 +2284,8 @@ SDL::quintuplets* SDL::Event::getQuintuplets()
         quintupletsInCPU->chiSquared = new float[nMemoryLocations];
         quintupletsInCPU->nonAnchorChiSquared = new float[nMemoryLocations];
 
-
-        cudaMemcpyAsync(quintupletsInCPU->nQuintuplets, quintupletsInGPU->nQuintuplets,  nLowerModules * sizeof(unsigned int), cudaMemcpyDeviceToHost,stream);
-        cudaMemcpyAsync(quintupletsInCPU->totOccupancyQuintuplets, quintupletsInGPU->totOccupancyQuintuplets,  nLowerModules * sizeof(unsigned int), cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(quintupletsInCPU->nQuintuplets, quintupletsInGPU->nQuintuplets,  nLowerModules * sizeof(int), cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(quintupletsInCPU->totOccupancyQuintuplets, quintupletsInGPU->totOccupancyQuintuplets,  nLowerModules * sizeof(int), cudaMemcpyDeviceToHost,stream);
         cudaMemcpyAsync(quintupletsInCPU->tripletIndices, quintupletsInGPU->tripletIndices, 2 * nMemoryLocations * sizeof(unsigned int), cudaMemcpyDeviceToHost,stream);
         cudaMemcpyAsync(quintupletsInCPU->lowerModuleIndices, quintupletsInGPU->lowerModuleIndices, 5 * nMemoryLocations * sizeof(uint16_t), cudaMemcpyDeviceToHost,stream);
         cudaMemcpyAsync(quintupletsInCPU->innerRadius, quintupletsInGPU->innerRadius, nMemoryLocations * sizeof(FPX), cudaMemcpyDeviceToHost,stream);
@@ -2292,7 +2301,6 @@ SDL::quintuplets* SDL::Event::getQuintuplets()
 
         cudaStreamSynchronize(stream);
     }
-
     return quintupletsInCPU;
 }
 
@@ -2403,47 +2411,47 @@ SDL::modules* SDL::Event::getFullModules()
         modulesInCPUFull = new SDL::modules;
         uint16_t nLowerModules;
         cudaMemcpyAsync(&nLowerModules, modulesInGPU->nLowerModules, sizeof(uint16_t), cudaMemcpyDeviceToHost,stream);
-cudaStreamSynchronize(stream);
-
-    modulesInCPUFull->detIds = new unsigned int[nModules];
-    modulesInCPUFull->moduleMap = new uint16_t[40*nModules];
-    modulesInCPUFull->nConnectedModules = new uint16_t[nModules];
-    modulesInCPUFull->drdzs = new float[nModules];
-    modulesInCPUFull->slopes = new float[nModules];
-    modulesInCPUFull->nModules = new uint16_t[1];
-    modulesInCPUFull->nLowerModules = new uint16_t[1];
-    modulesInCPUFull->layers = new short[nModules];
-    modulesInCPUFull->rings = new short[nModules];
-    modulesInCPUFull->modules = new short[nModules];
-    modulesInCPUFull->rods = new short[nModules];
-    modulesInCPUFull->subdets = new short[nModules];
-    modulesInCPUFull->sides = new short[nModules];
-    modulesInCPUFull->isInverted = new bool[nModules];
-    modulesInCPUFull->isLower = new bool[nModules];
-
-
-    modulesInCPUFull->moduleType = new ModuleType[nModules];
-    modulesInCPUFull->moduleLayerType = new ModuleLayerType[nModules];
-    cudaMemcpyAsync(modulesInCPUFull->detIds,modulesInGPU->detIds,nModules*sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(modulesInCPUFull->moduleMap,modulesInGPU->moduleMap,40*nModules*sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(modulesInCPUFull->nConnectedModules,modulesInGPU->nConnectedModules,nModules*sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(modulesInCPUFull->drdzs,modulesInGPU->drdzs,sizeof(float)*nModules,cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(modulesInCPUFull->slopes,modulesInGPU->slopes,sizeof(float)*nModules,cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(modulesInCPUFull->nLowerModules,modulesInGPU->nLowerModules,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(modulesInCPUFull->layers,modulesInGPU->layers,nModules*sizeof(short),cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(modulesInCPUFull->rings,modulesInGPU->rings,sizeof(short)*nModules,cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(modulesInCPUFull->modules,modulesInGPU->modules,sizeof(short)*nModules,cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(modulesInCPUFull->rods,modulesInGPU->rods,sizeof(short)*nModules,cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(modulesInCPUFull->subdets,modulesInGPU->subdets,sizeof(short)*nModules,cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(modulesInCPUFull->sides,modulesInGPU->sides,sizeof(short)*nModules,cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(modulesInCPUFull->isInverted,modulesInGPU->isInverted,sizeof(bool)*nModules,cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(modulesInCPUFull->isLower,modulesInGPU->isLower,sizeof(bool)*nModules,cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(modulesInCPUFull->moduleType,modulesInGPU->moduleType,sizeof(ModuleType)*nModules,cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(modulesInCPUFull->moduleLayerType,modulesInGPU->moduleLayerType,sizeof(ModuleLayerType)*nModules,cudaMemcpyDeviceToHost,stream);
-cudaStreamSynchronize(stream);
+        cudaStreamSynchronize(stream);
+
+        modulesInCPUFull->detIds = new unsigned int[nModules];
+        modulesInCPUFull->moduleMap = new uint16_t[40*nModules];
+        modulesInCPUFull->nConnectedModules = new uint16_t[nModules];
+        modulesInCPUFull->drdzs = new float[nModules];
+        modulesInCPUFull->slopes = new float[nModules];
+        modulesInCPUFull->nModules = new uint16_t[1];
+        modulesInCPUFull->nLowerModules = new uint16_t[1];
+        modulesInCPUFull->layers = new short[nModules];
+        modulesInCPUFull->rings = new short[nModules];
+        modulesInCPUFull->modules = new short[nModules];
+        modulesInCPUFull->rods = new short[nModules];
+        modulesInCPUFull->subdets = new short[nModules];
+        modulesInCPUFull->sides = new short[nModules];
+        modulesInCPUFull->isInverted = new bool[nModules];
+        modulesInCPUFull->isLower = new bool[nModules];
+
+        modulesInCPUFull->moduleType = new ModuleType[nModules];
+        modulesInCPUFull->moduleLayerType = new ModuleLayerType[nModules];
+        cudaMemcpyAsync(modulesInCPUFull->detIds,modulesInGPU->detIds,nModules*sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(modulesInCPUFull->moduleMap,modulesInGPU->moduleMap,40*nModules*sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(modulesInCPUFull->nConnectedModules,modulesInGPU->nConnectedModules,nModules*sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(modulesInCPUFull->drdzs,modulesInGPU->drdzs,sizeof(float)*nModules,cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(modulesInCPUFull->slopes,modulesInGPU->slopes,sizeof(float)*nModules,cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(modulesInCPUFull->nLowerModules,modulesInGPU->nLowerModules,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(modulesInCPUFull->layers,modulesInGPU->layers,nModules*sizeof(short),cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(modulesInCPUFull->rings,modulesInGPU->rings,sizeof(short)*nModules,cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(modulesInCPUFull->modules,modulesInGPU->modules,sizeof(short)*nModules,cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(modulesInCPUFull->rods,modulesInGPU->rods,sizeof(short)*nModules,cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(modulesInCPUFull->subdets,modulesInGPU->subdets,sizeof(short)*nModules,cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(modulesInCPUFull->sides,modulesInGPU->sides,sizeof(short)*nModules,cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(modulesInCPUFull->isInverted,modulesInGPU->isInverted,sizeof(bool)*nModules,cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(modulesInCPUFull->isLower,modulesInGPU->isLower,sizeof(bool)*nModules,cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(modulesInCPUFull->moduleType,modulesInGPU->moduleType,sizeof(ModuleType)*nModules,cudaMemcpyDeviceToHost,stream);
+        cudaMemcpyAsync(modulesInCPUFull->moduleLayerType,modulesInGPU->moduleLayerType,sizeof(ModuleLayerType)*nModules,cudaMemcpyDeviceToHost,stream);
+        cudaStreamSynchronize(stream);
     }
     return modulesInCPUFull;
 }
+
 SDL::modules* SDL::Event::getModules()
 {
     if(modulesInCPU == nullptr)
@@ -2482,5 +2490,4 @@ SDL::modules* SDL::Event::getModules()
         cudaStreamSynchronize(stream);
     }
     return modulesInCPU;
-}
-
+}
\ No newline at end of file
diff --git a/SDL/PixelTriplet.cuh b/SDL/PixelTriplet.cuh
index d54caab9..cfb1c630 100644
--- a/SDL/PixelTriplet.cuh
+++ b/SDL/PixelTriplet.cuh
@@ -2133,7 +2133,7 @@ namespace SDL
                     if( modulesInGPU.moduleType[quintupletLowerModuleIndex] == SDL::TwoS) continue;
                     uint16_t pixelModuleIndex = *modulesInGPU.nLowerModules;
                     if(segmentsInGPU.isDup[i_pLS]) continue;
-                    unsigned int nOuterQuintuplets = quintupletsInGPU.nQuintuplets[quintupletLowerModuleIndex];
+                    int nOuterQuintuplets = quintupletsInGPU.nQuintuplets[quintupletLowerModuleIndex];
 
                     if(nOuterQuintuplets == 0) continue;
 
diff --git a/SDL/Quintuplet.cu b/SDL/Quintuplet.cu
index 0582bd72..0fa2caaa 100644
--- a/SDL/Quintuplet.cu
+++ b/SDL/Quintuplet.cu
@@ -1,6 +1,4 @@
 # include "Quintuplet.cuh"
-#include "allocate.h"
-#include "Kernels.cuh"
 
 SDL::quintuplets::quintuplets()
 {
@@ -33,7 +31,7 @@ void SDL::quintuplets::freeMemoryCache()
 {
     int dev;
     cudaGetDevice(&dev);
-    cms::cuda::free_device(dev,tripletIndices);
+    cms::cuda::free_device(dev, tripletIndices);
     cms::cuda::free_device(dev, lowerModuleIndices);
     cms::cuda::free_device(dev, nQuintuplets);
     cms::cuda::free_device(dev, totOccupancyQuintuplets);
@@ -79,79 +77,16 @@ void SDL::quintuplets::freeMemory(cudaStream_t stream)
     cudaFree(nonAnchorChiSquared);
     cudaStreamSynchronize(stream);
 }
-//TODO:Reuse the track candidate one instead of this!
-__global__ void SDL::createEligibleModulesListForQuintupletsGPU(struct modules& modulesInGPU,struct triplets& tripletsInGPU, unsigned int* device_nTotalQuintuplets, cudaStream_t stream,struct objectRanges& rangesInGPU)
-{
-    __shared__ int nEligibleT5Modulesx;
-    __shared__ unsigned int nTotalQuintupletsx;
-    nTotalQuintupletsx = 0; //start!
-    nEligibleT5Modulesx = 0;
-    __syncthreads();
-
-    unsigned int occupancy;
-    unsigned int category_number, eta_number;
-    unsigned int layers, subdets, rings;
-    float eta;
-    //start filling
-    int gid = blockIdx.x * blockDim.x + threadIdx.x;
-    int np = gridDim.x * blockDim.x;
-    for(uint16_t i = gid; i < *modulesInGPU.nLowerModules; i+= np)
-    {
-        //condition for a quintuple to exist for a module
-        //TCs don't exist for layers 5 and 6 barrel, and layers 2,3,4,5 endcap   
-        layers = modulesInGPU.layers[i];
-        subdets = modulesInGPU.subdets[i];
-        rings = modulesInGPU.rings[i];
-        eta = modulesInGPU.eta[i];  
-        occupancy = 0;
-
-        if (tripletsInGPU.nTriplets[i] == 0) continue;
-        if (subdets == SDL::Barrel and layers >= 3) continue;
-        if (subdets == SDL::Endcap and layers > 1) continue;
-
-        int nEligibleT5Modules = atomicAdd(&nEligibleT5Modulesx,1);
-        if (nEligibleT5Modules < 0) printf("%u\n",nEligibleT5Modules);
-        if (layers<=3 && subdets==5) category_number = 0;
-        if (layers>=4 && subdets==5) category_number = 1;
-        if (layers<=2 && subdets==4 && rings>=11) category_number = 2;
-        if (layers>=3 && subdets==4 && rings>=8) category_number = 2;
-        if (layers<=2 && subdets==4 && rings<=10) category_number = 3;
-        if (layers>=3 && subdets==4 && rings<=7) category_number = 3;
-        if (abs(eta)<0.75) eta_number=0;
-        if (abs(eta)>0.75 && abs(eta)<1.5) eta_number=1;
-        if (abs(eta)>1.5 && abs(eta)<2.25) eta_number=2;
-        if (abs(eta)>2.25 && abs(eta)<3) eta_number=3;
-
-        if (category_number == 0 && eta_number == 0) occupancy = 336;
-        if (category_number == 0 && eta_number == 1) occupancy = 414;
-        if (category_number == 0 && eta_number == 2) occupancy = 231;
-        if (category_number == 0 && eta_number == 3) occupancy = 146;
-        if (category_number == 3 && eta_number == 1) occupancy = 0;
-        if (category_number == 3 && eta_number == 2) occupancy = 191;
-        if (category_number == 3 && eta_number == 3) occupancy = 106;
-
-        unsigned int nTotQ = atomicAdd(&nTotalQuintupletsx,occupancy);
-        rangesInGPU.quintupletModuleIndices[i] = nTotQ;
-        rangesInGPU.indicesOfEligibleT5Modules[nEligibleT5Modules] = i;
-    }
-    __syncthreads();
-    if(threadIdx.x==0){
-        *rangesInGPU.nEligibleT5Modules = static_cast<uint16_t>(nEligibleT5Modulesx);
-        *device_nTotalQuintuplets = nTotalQuintupletsx;
-    }
-}
 
 void SDL::createQuintupletsInExplicitMemory(struct SDL::quintuplets& quintupletsInGPU, const unsigned int& nTotalQuintuplets, const uint16_t& nLowerModules, const uint16_t& nEligibleModules,cudaStream_t stream)
 {
-    //unsigned int nMemoryLocations = nEligibleModules * maxQuintuplets;
 #ifdef CACHE_ALLOC
- //   cudaStream_t stream = 0;
     int dev;
     cudaGetDevice(&dev);
     quintupletsInGPU.tripletIndices = (unsigned int*)cms::cuda::allocate_device(dev, 2 * nTotalQuintuplets * sizeof(unsigned int), stream);
     quintupletsInGPU.lowerModuleIndices = (uint16_t*)cms::cuda::allocate_device(dev, 5 * nTotalQuintuplets * sizeof(uint16_t), stream);
-    quintupletsInGPU.nQuintuplets = (unsigned int*)cms::cuda::allocate_device(dev, nLowerModules * sizeof(unsigned int), stream);
-    quintupletsInGPU.totOccupancyQuintuplets = (unsigned int*)cms::cuda::allocate_device(dev, nLowerModules * sizeof(unsigned int), stream);
+    quintupletsInGPU.nQuintuplets = (int*)cms::cuda::allocate_device(dev, nLowerModules * sizeof(int), stream);
+    quintupletsInGPU.totOccupancyQuintuplets = (int*)cms::cuda::allocate_device(dev, nLowerModules * sizeof(int), stream);
     quintupletsInGPU.innerRadius = (FPX*)cms::cuda::allocate_device(dev, nTotalQuintuplets * sizeof(FPX), stream);
     quintupletsInGPU.outerRadius = (FPX*)cms::cuda::allocate_device(dev, nTotalQuintuplets * sizeof(FPX), stream);
     quintupletsInGPU.bridgeRadius = (FPX*)cms::cuda::allocate_device(dev, nTotalQuintuplets * sizeof(float), stream);
@@ -173,8 +108,8 @@ void SDL::createQuintupletsInExplicitMemory(struct SDL::quintuplets& quintuplets
 #else
     cudaMalloc(&quintupletsInGPU.tripletIndices, 2 * nTotalQuintuplets * sizeof(unsigned int));
     cudaMalloc(&quintupletsInGPU.lowerModuleIndices, 5 * nTotalQuintuplets * sizeof(uint16_t));
-    cudaMalloc(&quintupletsInGPU.nQuintuplets, nLowerModules * sizeof(unsigned int));
-    cudaMalloc(&quintupletsInGPU.totOccupancyQuintuplets, nLowerModules * sizeof(unsigned int));
+    cudaMalloc(&quintupletsInGPU.nQuintuplets, nLowerModules * sizeof(int));
+    cudaMalloc(&quintupletsInGPU.totOccupancyQuintuplets, nLowerModules * sizeof(int));
     cudaMalloc(&quintupletsInGPU.innerRadius, nTotalQuintuplets * sizeof(FPX));
     cudaMalloc(&quintupletsInGPU.outerRadius, nTotalQuintuplets * sizeof(FPX));
     cudaMalloc(&quintupletsInGPU.pt, nTotalQuintuplets *4* sizeof(FPX));
@@ -193,1875 +128,12 @@ void SDL::createQuintupletsInExplicitMemory(struct SDL::quintuplets& quintuplets
     cudaMalloc(&quintupletsInGPU.nonAnchorChiSquared, nTotalQuintuplets * sizeof(float));
     cudaMalloc(&quintupletsInGPU.nMemoryLocations, sizeof(unsigned int));
 #endif
-    cudaMemsetAsync(quintupletsInGPU.nQuintuplets,0,nLowerModules * sizeof(unsigned int),stream);
-    cudaMemsetAsync(quintupletsInGPU.totOccupancyQuintuplets,0,nLowerModules * sizeof(unsigned int),stream);
+    cudaMemsetAsync(quintupletsInGPU.nQuintuplets,0,nLowerModules * sizeof(int),stream);
+    cudaMemsetAsync(quintupletsInGPU.totOccupancyQuintuplets,0,nLowerModules * sizeof(int),stream);
     cudaMemsetAsync(quintupletsInGPU.isDup,0,nTotalQuintuplets * sizeof(bool),stream);
     cudaMemsetAsync(quintupletsInGPU.partOfPT5,0,nTotalQuintuplets * sizeof(bool),stream);
     cudaStreamSynchronize(stream);
     quintupletsInGPU.eta = quintupletsInGPU.pt + nTotalQuintuplets;
     quintupletsInGPU.phi = quintupletsInGPU.pt + 2*nTotalQuintuplets;
     quintupletsInGPU.score_rphisum = quintupletsInGPU.pt + 3*nTotalQuintuplets;
-}
-
-
-ALPAKA_FN_ACC void SDL::addQuintupletToMemory(struct SDL::triplets& tripletsInGPU, struct SDL::quintuplets& quintupletsInGPU, unsigned int innerTripletIndex, unsigned int outerTripletIndex, uint16_t& lowerModule1, uint16_t& lowerModule2, uint16_t& lowerModule3, uint16_t& lowerModule4, uint16_t& lowerModule5, float& innerRadius, float& bridgeRadius, float& outerRadius, float& regressionG, float& regressionF, float& regressionRadius, float& rzChiSquared, float& rPhiChiSquared, float&
-        nonAnchorChiSquared, float pt, float eta, float phi, float scores, uint8_t layer, unsigned int quintupletIndex)
-
-{
-    quintupletsInGPU.tripletIndices[2 * quintupletIndex] = innerTripletIndex;
-    quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1] = outerTripletIndex;
-
-    quintupletsInGPU.lowerModuleIndices[5 * quintupletIndex] = lowerModule1;
-    quintupletsInGPU.lowerModuleIndices[5 * quintupletIndex + 1] = lowerModule2;
-    quintupletsInGPU.lowerModuleIndices[5 * quintupletIndex + 2] = lowerModule3;
-    quintupletsInGPU.lowerModuleIndices[5 * quintupletIndex + 3] = lowerModule4;
-    quintupletsInGPU.lowerModuleIndices[5 * quintupletIndex + 4] = lowerModule5;
-    quintupletsInGPU.innerRadius[quintupletIndex] = __F2H(innerRadius);
-    quintupletsInGPU.outerRadius[quintupletIndex] = __F2H(outerRadius);
-    quintupletsInGPU.pt[quintupletIndex] = __F2H(pt);
-    quintupletsInGPU.eta[quintupletIndex] = __F2H(eta);
-    quintupletsInGPU.phi[quintupletIndex] = __F2H(phi);
-    quintupletsInGPU.score_rphisum[quintupletIndex] = __F2H(scores);
-    quintupletsInGPU.layer[quintupletIndex] = layer;
-    quintupletsInGPU.isDup[quintupletIndex] = false;
-    quintupletsInGPU.regressionRadius[quintupletIndex] = regressionRadius;
-    quintupletsInGPU.regressionG[quintupletIndex] = regressionG;
-    quintupletsInGPU.regressionF[quintupletIndex] = regressionF;
-    quintupletsInGPU.logicalLayers[5 * quintupletIndex] = tripletsInGPU.logicalLayers[3 * innerTripletIndex];
-    quintupletsInGPU.logicalLayers[5 * quintupletIndex + 1] = tripletsInGPU.logicalLayers[3 * innerTripletIndex + 1];
-    quintupletsInGPU.logicalLayers[5 * quintupletIndex + 2] = tripletsInGPU.logicalLayers[3 * innerTripletIndex + 2];
-    quintupletsInGPU.logicalLayers[5 * quintupletIndex + 3] = tripletsInGPU.logicalLayers[3 * outerTripletIndex + 1];
-    quintupletsInGPU.logicalLayers[5 * quintupletIndex + 4] = tripletsInGPU.logicalLayers[3 * outerTripletIndex + 2];
-
-    quintupletsInGPU.hitIndices[10 * quintupletIndex] = tripletsInGPU.hitIndices[6 * innerTripletIndex];
-    quintupletsInGPU.hitIndices[10 * quintupletIndex + 1] = tripletsInGPU.hitIndices[6 * innerTripletIndex + 1];
-    quintupletsInGPU.hitIndices[10 * quintupletIndex + 2] = tripletsInGPU.hitIndices[6 * innerTripletIndex + 2];
-    quintupletsInGPU.hitIndices[10 * quintupletIndex + 3] = tripletsInGPU.hitIndices[6 * innerTripletIndex + 3];
-    quintupletsInGPU.hitIndices[10 * quintupletIndex + 4] = tripletsInGPU.hitIndices[6 * innerTripletIndex + 4];
-    quintupletsInGPU.hitIndices[10 * quintupletIndex + 5] = tripletsInGPU.hitIndices[6 * innerTripletIndex + 5];
-    quintupletsInGPU.hitIndices[10 * quintupletIndex + 6] = tripletsInGPU.hitIndices[6 * outerTripletIndex + 2];
-    quintupletsInGPU.hitIndices[10 * quintupletIndex + 7] = tripletsInGPU.hitIndices[6 * outerTripletIndex + 3];
-    quintupletsInGPU.hitIndices[10 * quintupletIndex + 8] = tripletsInGPU.hitIndices[6 * outerTripletIndex + 4];
-    quintupletsInGPU.hitIndices[10 * quintupletIndex + 9] = tripletsInGPU.hitIndices[6 * outerTripletIndex + 5];
-    quintupletsInGPU.bridgeRadius[quintupletIndex] = bridgeRadius;
-    quintupletsInGPU.rzChiSquared[quintupletIndex] = rzChiSquared;
-    quintupletsInGPU.chiSquared[quintupletIndex] = rPhiChiSquared;
-    quintupletsInGPU.nonAnchorChiSquared[quintupletIndex] = nonAnchorChiSquared;
-
-}
-
-ALPAKA_FN_ACC bool SDL::runQuintupletDefaultAlgo(struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, struct SDL::triplets& tripletsInGPU, uint16_t& lowerModuleIndex1, uint16_t& lowerModuleIndex2, uint16_t& lowerModuleIndex3, uint16_t& lowerModuleIndex4, uint16_t& lowerModuleIndex5, unsigned int& innerTripletIndex, unsigned int& outerTripletIndex, float& innerRadius, float& outerRadius, float& bridgeRadius, float& regressionG, float& regressionF, float& regressionRadius, float& rzChiSquared, float& chiSquared, float& nonAnchorChiSquared)
-{
-    bool pass = true;
-    unsigned int firstSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex];
-    unsigned int secondSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex + 1];
-    unsigned int thirdSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex];
-    unsigned int fourthSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex + 1];
-
-    unsigned int innerOuterOuterMiniDoubletIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex + 1]; //inner triplet outer segment outer MD index
-    unsigned int outerInnerInnerMiniDoubletIndex = segmentsInGPU.mdIndices[2 * thirdSegmentIndex]; //outer triplet inner segmnet inner MD index
-
-    //this cut reduces the number of candidates by a factor of 3, i.e., 2 out of 3 warps can end right here!
-    if (innerOuterOuterMiniDoubletIndex != outerInnerInnerMiniDoubletIndex) return false;
-    
-    //apply T4 criteria between segments 1 and 3
-    float zOut, rtOut, deltaPhiPos, deltaPhi, betaIn, betaOut, pt_beta; //temp stuff
-    float zLo, zHi, rtLo, rtHi, zLoPointed, zHiPointed, sdlCut, betaInCut, betaOutCut, deltaBetaCut, kZ;
-
-    unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * firstSegmentIndex];
-    unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex];
-    unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex + 1];
-    unsigned int fourthMDIndex = segmentsInGPU.mdIndices[2 * thirdSegmentIndex + 1];
-    unsigned int fifthMDIndex = segmentsInGPU.mdIndices[2 * fourthSegmentIndex + 1];
-
-    pass = pass and runQuintupletDefaultAlgo(modulesInGPU, mdsInGPU, segmentsInGPU, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, firstSegmentIndex, thirdSegmentIndex, firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, zOut, rtOut, deltaPhiPos, deltaPhi, betaIn, betaOut, pt_beta, zLo, zHi, rtLo, rtHi, zLoPointed, zHiPointed, sdlCut, betaInCut, betaOutCut, deltaBetaCut, kZ);
-    if(not pass) return pass;
-
-    pass = pass and runQuintupletDefaultAlgo(modulesInGPU, mdsInGPU, segmentsInGPU, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex4, lowerModuleIndex5, firstSegmentIndex, fourthSegmentIndex, firstMDIndex, secondMDIndex, fourthMDIndex, fifthMDIndex, zOut, rtOut, deltaPhiPos, deltaPhi, betaIn, betaOut, pt_beta, zLo, zHi, rtLo, rtHi, zLoPointed, zHiPointed, sdlCut, betaInCut, betaOutCut, deltaBetaCut, kZ);
-    if(not pass) return pass;
-
-    pass = pass and passT5RZConstraint(modulesInGPU, mdsInGPU, firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, fifthMDIndex, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5);
-    if(not pass) return pass;
-
-    float x1 = mdsInGPU.anchorX[firstMDIndex];
-    float x2 = mdsInGPU.anchorX[secondMDIndex];
-    float x3 = mdsInGPU.anchorX[thirdMDIndex];
-    float x4 = mdsInGPU.anchorX[fourthMDIndex];
-    float x5 = mdsInGPU.anchorX[fifthMDIndex];
-    
-    float y1 = mdsInGPU.anchorY[firstMDIndex];
-    float y2 = mdsInGPU.anchorY[secondMDIndex];
-    float y3 = mdsInGPU.anchorY[thirdMDIndex];
-    float y4 = mdsInGPU.anchorY[fourthMDIndex];
-    float y5 = mdsInGPU.anchorY[fifthMDIndex];
-
-    //construct the arrays
-    float x1Vec[] = {x1, x1, x1};
-    float y1Vec[] = {y1, y1, y1};
-    float x2Vec[] = {x2, x2, x2};
-    float y2Vec[] = {y2, y2, y2};
-    float x3Vec[] = {x3, x3, x3};
-    float y3Vec[] = {y3, y3, y3};
-
-    if(modulesInGPU.subdets[lowerModuleIndex1] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == SDL::TwoS)
-    {
-        x1Vec[1] = mdsInGPU.anchorLowEdgeX[firstMDIndex];
-        x1Vec[2] = mdsInGPU.anchorHighEdgeX[firstMDIndex];
-
-        y1Vec[1] = mdsInGPU.anchorLowEdgeY[firstMDIndex];
-        y1Vec[2] = mdsInGPU.anchorHighEdgeY[firstMDIndex];
-    }
-    if(modulesInGPU.subdets[lowerModuleIndex2] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == SDL::TwoS)
-    {
-        x2Vec[1] = mdsInGPU.anchorLowEdgeX[secondMDIndex];
-        x2Vec[2] = mdsInGPU.anchorHighEdgeX[secondMDIndex];
-
-        y2Vec[1] = mdsInGPU.anchorLowEdgeY[secondMDIndex];
-        y2Vec[2] = mdsInGPU.anchorHighEdgeY[secondMDIndex];
-    }
-    if(modulesInGPU.subdets[lowerModuleIndex3] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == SDL::TwoS)
-    {
-        x3Vec[1] = mdsInGPU.anchorLowEdgeX[thirdMDIndex];
-        x3Vec[2] = mdsInGPU.anchorHighEdgeX[thirdMDIndex];
-
-        y3Vec[1] = mdsInGPU.anchorLowEdgeY[thirdMDIndex];
-        y3Vec[2] = mdsInGPU.anchorHighEdgeY[thirdMDIndex];
-    }
-
-    float innerRadiusMin2S, innerRadiusMax2S;
-    computeErrorInRadius(x1Vec, y1Vec, x2Vec, y2Vec, x3Vec, y3Vec, innerRadiusMin2S, innerRadiusMax2S);
-
-    for (int i=0; i<3; i++) 
-    {
-      x1Vec[i] = x4;
-      y1Vec[i] = y4;
-    }
-    if(modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex4] == SDL::TwoS)
-    {
-        x1Vec[1] = mdsInGPU.anchorLowEdgeX[fourthMDIndex];
-        x1Vec[2] = mdsInGPU.anchorHighEdgeX[fourthMDIndex];
-
-        y1Vec[1] = mdsInGPU.anchorLowEdgeY[fourthMDIndex];
-        y1Vec[2] = mdsInGPU.anchorHighEdgeY[fourthMDIndex];
-    }
-
-    float bridgeRadiusMin2S, bridgeRadiusMax2S;
-    computeErrorInRadius(x2Vec, y2Vec, x3Vec, y3Vec, x1Vec, y1Vec, bridgeRadiusMin2S, bridgeRadiusMax2S);
-
-    for(int i=0; i<3; i++) 
-    {
-      x2Vec[i] = x5;
-      y2Vec[i] = y5;
-    }
-    if(modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex5] == SDL::TwoS)
-    {
-        x2Vec[1] = mdsInGPU.anchorLowEdgeX[fifthMDIndex];
-        x2Vec[2] = mdsInGPU.anchorHighEdgeX[fifthMDIndex];
-
-        y2Vec[1] = mdsInGPU.anchorLowEdgeY[fifthMDIndex];
-        y2Vec[2] = mdsInGPU.anchorHighEdgeY[fifthMDIndex];
-    }
-
-    float outerRadiusMin2S, outerRadiusMax2S;
-    computeErrorInRadius(x3Vec, y3Vec, x1Vec, y1Vec, x2Vec, y2Vec, outerRadiusMin2S, outerRadiusMax2S);
-
-    float g, f;
-    innerRadius = computeRadiusFromThreeAnchorHits(x1, y1, x2, y2, x3, y3, g, f);
-    outerRadius = computeRadiusFromThreeAnchorHits(x3, y3, x4, y4, x5, y5, g, f);
-    bridgeRadius = computeRadiusFromThreeAnchorHits(x2, y2, x3, y3, x4, y4, g, f);
-
-
-    pass = pass & (innerRadius >= 0.95f * ptCut/(2.f * k2Rinv1GeVf));
-
-    float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax;
-
-    //split by category
-    bool tempPass;
-    if(modulesInGPU.subdets[lowerModuleIndex1] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex2] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex3] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex4] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex5] == SDL::Barrel)
-    {
-       tempPass = matchRadiiBBBBB(innerRadius, bridgeRadius, outerRadius, innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
-    }
-    else if(modulesInGPU.subdets[lowerModuleIndex1] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex2] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex3] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex4] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap)
-    {
-        tempPass = matchRadiiBBBBE(innerRadius, bridgeRadius, outerRadius, innerRadiusMin2S, innerRadiusMax2S, bridgeRadiusMin2S, bridgeRadiusMax2S, outerRadiusMin2S, outerRadiusMax2S, innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
-    }
-    else if(modulesInGPU.subdets[lowerModuleIndex1] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex2] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex3] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap and modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap)
-    {
-        if(modulesInGPU.layers[lowerModuleIndex1] == 1)
-        {
-            tempPass = matchRadiiBBBEE12378(innerRadius, bridgeRadius, outerRadius,innerRadiusMin2S, innerRadiusMax2S, bridgeRadiusMin2S, bridgeRadiusMax2S, outerRadiusMin2S, outerRadiusMax2S, innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
-        }
-        else if(modulesInGPU.layers[lowerModuleIndex1] == 2)
-        {
-            tempPass = matchRadiiBBBEE23478(innerRadius, bridgeRadius, outerRadius,innerRadiusMin2S, innerRadiusMax2S, bridgeRadiusMin2S, bridgeRadiusMax2S, outerRadiusMin2S, outerRadiusMax2S, innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
-        }
-        else
-        {
-            tempPass = matchRadiiBBBEE34578(innerRadius, bridgeRadius, outerRadius,innerRadiusMin2S, innerRadiusMax2S, bridgeRadiusMin2S, bridgeRadiusMax2S, outerRadiusMin2S, outerRadiusMax2S, innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
-        }
-    }
-
-    else if(modulesInGPU.subdets[lowerModuleIndex1] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex2] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex3] == SDL::Endcap and modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap and modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap)
-    {
-        tempPass = matchRadiiBBEEE(innerRadius, bridgeRadius, outerRadius, innerRadiusMin2S, innerRadiusMax2S, bridgeRadiusMin2S, bridgeRadiusMax2S, outerRadiusMin2S, outerRadiusMax2S, innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
-    }
-    else if(modulesInGPU.subdets[lowerModuleIndex1] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex2] == SDL::Endcap and modulesInGPU.subdets[lowerModuleIndex3] == SDL::Endcap and modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap and modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap)
-    {
-        tempPass = matchRadiiBEEEE(innerRadius, bridgeRadius, outerRadius, innerRadiusMin2S, innerRadiusMax2S, bridgeRadiusMin2S, bridgeRadiusMax2S, outerRadiusMin2S, outerRadiusMax2S, innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
-    }
-    else
-    {
-        tempPass = matchRadiiEEEEE(innerRadius, bridgeRadius, outerRadius, innerRadiusMin2S, innerRadiusMax2S, bridgeRadiusMin2S, bridgeRadiusMax2S, outerRadiusMin2S, outerRadiusMax2S,innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
-    }
-
-    //compute regression radius right here - this computation is expensive!!!
-    pass = pass and tempPass;
-    if(not pass) return pass;
-
-    float xVec[] = {x1, x2, x3, x4, x5};
-    float yVec[] = {y1, y2, y3, y4, y5};
-    float sigmas[5], delta1[5], delta2[5], slopes[5];
-    bool isFlat[5];
-    //5 categories for sigmas
-    const uint16_t lowerModuleIndices[] = {lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5};
-
-    computeSigmasForRegression(modulesInGPU, lowerModuleIndices, delta1, delta2, slopes, isFlat);
-    regressionRadius = computeRadiusUsingRegression(5,xVec, yVec, delta1, delta2, slopes, isFlat, regressionG, regressionF, sigmas, chiSquared);
-
-    //extra chi squared cuts!
-    if(regressionRadius < 5.0f/(2.f * k2Rinv1GeVf))
-    {
-        pass = pass and passChiSquaredConstraint(modulesInGPU, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5, chiSquared);
-        if(not pass) return pass;
-    }
-
-    //compute the other chisquared
-    //non anchor is always shifted for tilted and endcap!
-    float nonAnchorDelta1[5], nonAnchorDelta2[5], nonAnchorSlopes[5];
-    float nonAnchorxs[] = { mdsInGPU.outerX[firstMDIndex], mdsInGPU.outerX[secondMDIndex], mdsInGPU.outerX[thirdMDIndex], mdsInGPU.outerX[fourthMDIndex], mdsInGPU.outerX[fifthMDIndex]};
-    float nonAnchorys[] = { mdsInGPU.outerY[firstMDIndex], mdsInGPU.outerY[secondMDIndex], mdsInGPU.outerY[thirdMDIndex], mdsInGPU.outerY[fourthMDIndex], mdsInGPU.outerY[fifthMDIndex]};
-
-    computeSigmasForRegression(modulesInGPU, lowerModuleIndices, nonAnchorDelta1, nonAnchorDelta2, nonAnchorSlopes, isFlat, 5, false);
-    nonAnchorChiSquared = computeChiSquared(5, nonAnchorxs, nonAnchorys, nonAnchorDelta1, nonAnchorDelta2, nonAnchorSlopes, isFlat, regressionG, regressionF, regressionRadius);
-    return pass;
-}
-
-//90% constraint
-ALPAKA_FN_ACC bool SDL::passChiSquaredConstraint(struct SDL::modules& modulesInGPU, uint16_t& lowerModuleIndex1, uint16_t& lowerModuleIndex2, uint16_t& lowerModuleIndex3, uint16_t& lowerModuleIndex4, uint16_t& lowerModuleIndex5, float& chiSquared)
-{
-    //following Philip's layer number prescription
-    const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == SDL::TwoS);
-    const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == SDL::TwoS);
-    const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == SDL::TwoS);
-    const int layer4 = modulesInGPU.layers[lowerModuleIndex4] + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex4] == SDL::TwoS);
-    const int layer5 = modulesInGPU.layers[lowerModuleIndex5] + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex5] == SDL::TwoS);
-
-    if(layer1 == 7 and layer2 == 8 and layer3 == 9)
-    {
-        if(layer4 == 10 and layer5 == 11)
-        {
-            return chiSquared < 0.01788f;
-        }
-        else if(layer4 == 10 and layer5 == 16)
-        {
-            return chiSquared < 0.04725f;
-        }
-        else if(layer4 == 15 and layer5 == 16)
-        {
-            return chiSquared < 0.04725f;
-        }
-    }
-    else if(layer1 == 1 and layer2 == 7 and layer3 == 8)
-    {
-        if(layer4 == 9 and layer5 == 10)
-        {       
-            return chiSquared < 0.01788f;
-        }   
-        else if(layer4 == 9 and layer5 == 15)
-        {
-            return chiSquared < 0.08234f;
-        }
-    }
-    else if(layer1 == 1 and layer2 == 2 and layer3 == 7)
-    {
-        if(layer4 == 8 and layer5 == 9)
-        {
-            return chiSquared < 0.02360f;
-        }
-        else if(layer4 == 8 and layer5 == 14)
-        {
-            return chiSquared < 0.07167f;
-        }
-        else if(layer4 == 13 and layer5 == 14)
-        {   
-            return chiSquared < 0.08234f;
-        }
-    }
-    else if(layer1 == 1 and layer2 == 2 and layer3 == 3)
-    {
-        if(layer4 == 7 and layer5 == 8)
-        {
-            return chiSquared < 0.01026f;
-        }
-        else if(layer4 == 7 and layer5 == 13)
-        {
-            return chiSquared < 0.06238f;
-        }
-        else if(layer4 == 12 and layer5 == 13)
-        {
-            return chiSquared < 0.06238f;
-        }
-    }
-    else if(layer1 == 1 and layer2 == 2 and layer3 == 3 and layer4 == 4)
-    {
-        if(layer5 == 12)
-        {
-            return chiSquared < 0.09461f;
-        }
-        else if(layer5 == 5)
-        {
-            return chiSquared < 0.04725f;
-        }
-    }
-    else if(layer1 == 2 and layer2 == 7 and layer3 == 8)
-    {
-        if(layer4 == 9 and layer5 == 10)
-        {
-            return chiSquared < 0.00512f;
-        }
-        if(layer4 == 9 and layer5 == 15)
-        {
-            return chiSquared < 0.04112f;
-        }
-        else if(layer4 == 14 and layer5 == 15)
-        {
-            return chiSquared < 0.06238f;
-        }
-    }
-    else if(layer1 == 2 and layer2 == 3 and layer3 == 7)
-    {
-        if(layer4 == 8 and layer5 == 14)
-        {
-            return chiSquared < 0.07167f;
-        }
-        else if(layer4 == 13 and layer5 == 14)
-        {
-            return chiSquared < 0.06238f;
-        }
-    }
-    else if(layer1 == 2 and layer2 == 3 and layer3 == 4)
-    {
-        if(layer4 == 12 and layer5 == 13)
-        {
-            return chiSquared < 0.10870f;
-        }
-        else if(layer4 == 5 and layer5 == 12)
-        {
-            return chiSquared < 0.10870f;
-        }
-        else if(layer4 == 5 and layer5 == 6)
-        {
-            return chiSquared < 0.08234f;
-        }
-    }
-    else if(layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15)
-    {
-        return chiSquared < 0.09461f;
-    }
-    else if(layer1 == 3 and layer2 == 4 and layer3 == 5 and layer4 == 12 and layer5 == 13)
-    {
-        return chiSquared < 0.09461f;
-    }
-
-    return true;
-}
-
-//bounds can be found at http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_RZFix/t5_rz_thresholds.txt
-ALPAKA_FN_ACC bool SDL::passT5RZConstraint(struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, unsigned int firstMDIndex, unsigned int secondMDIndex, unsigned int thirdMDIndex, unsigned int fourthMDIndex, unsigned int fifthMDIndex, uint16_t& lowerModuleIndex1, uint16_t& lowerModuleIndex2, uint16_t& lowerModuleIndex3, uint16_t& lowerModuleIndex4, uint16_t& lowerModuleIndex5) 
-{
-    const float& rt1 = mdsInGPU.anchorRt[firstMDIndex];
-    const float& rt2 = mdsInGPU.anchorRt[secondMDIndex];
-    const float& rt3 = mdsInGPU.anchorRt[thirdMDIndex];
-    const float& rt4 = mdsInGPU.anchorRt[fourthMDIndex];
-    const float& rt5 = mdsInGPU.anchorRt[fifthMDIndex];
-
-    const float& z1 = mdsInGPU.anchorZ[firstMDIndex];
-    const float& z2 = mdsInGPU.anchorZ[secondMDIndex];
-    const float& z3 = mdsInGPU.anchorZ[thirdMDIndex];
-    const float& z4 = mdsInGPU.anchorZ[fourthMDIndex];
-    const float& z5 = mdsInGPU.anchorZ[fifthMDIndex];
-
-    //following Philip's layer number prescription
-    const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == SDL::TwoS);
-    const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == SDL::TwoS);
-    const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == SDL::TwoS);
-    const int layer4 = modulesInGPU.layers[lowerModuleIndex4] + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex4] == SDL::TwoS);
-    const int layer5 = modulesInGPU.layers[lowerModuleIndex5] + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex5] == SDL::TwoS);
-
-    //slope computed using the internal T3s
-    const int moduleLayer1 = modulesInGPU.moduleType[lowerModuleIndex1];
-    const int moduleLayer2 = modulesInGPU.moduleType[lowerModuleIndex2];
-    const int moduleLayer3 = modulesInGPU.moduleType[lowerModuleIndex3];
-    const int moduleLayer4 = modulesInGPU.moduleType[lowerModuleIndex4];
-    const int moduleLayer5 = modulesInGPU.moduleType[lowerModuleIndex5];
-
-    float slope;
-    if(moduleLayer1 == 0 and moduleLayer2 == 0 and moduleLayer3 == 1) //PSPS2S
-    {
-        slope = (z2 -z1)/(rt2 - rt1);
-    }
-    else
-    {
-        slope = (z3 - z1)/(rt3 - rt1);
-    }
-    float residual4 = (layer4 <= 6)? ((z4 - z1) - slope * (rt4 - rt1)) : ((rt4 - rt1) - (z4 - z1)/slope);
-    float residual5 = (layer4 <= 6) ? ((z5 - z1) - slope * (rt5 - rt1)) : ((rt5 - rt1) - (z5 - z1)/slope);
-
-    // creating a chi squared type quantity
-    // 0-> PS, 1->2S
-    residual4 = (moduleLayer4 == 0) ? residual4/2.4f : residual4/5.0f;
-    residual5 = (moduleLayer5 == 0) ? residual5/2.4f : residual5/5.0f;
-
-    const float RMSE = sqrtf(0.5 * (residual4 * residual4 + residual5 * residual5));
-
-    //categories!
-    if(layer1 == 1 and layer2 == 2 and layer3 == 3)
-    {
-        if(layer4 == 4 and layer5 == 5)
-        {
-            return RMSE < 0.545f; 
-        }
-        else if(layer4 == 4 and layer5 == 12)
-        {
-            return RMSE < 1.105f;
-        }
-        else if(layer4 == 7 and layer5 == 13)
-        {
-            return RMSE < 0.775f;
-        }
-        else if(layer4 == 12 and layer5 == 13)
-        {
-            return RMSE < 0.625f;
-        }
-    }
-    else if(layer1 == 1 and layer2 == 2 and layer3 == 7)
-    {
-        if(layer4 == 8 and layer5 == 14)
-        {
-            return RMSE < 0.835f;
-        }
-        else if(layer4 == 13 and layer5 == 14)
-        {
-            return RMSE < 0.575f;
-        }
-    }
-    else if(layer1 == 1 and layer2 == 7 and layer3 == 8 and layer4 == 9 and layer5 == 15)
-    {
-        return RMSE < 0.825f;
-    }
-    else if(layer1 == 2 and layer2 == 3 and layer3 == 4)
-    {
-        if(layer4 == 5 and layer5 == 6)
-        {
-            return RMSE < 0.845f;
-        }
-        else if(layer4 == 5 and layer5 == 12)
-        {
-            return RMSE < 1.365f;
-        }
-
-        else if(layer4 == 12 and layer5 == 13)
-        {
-            return RMSE < 0.675f;
-        }
-    }
-    else if(layer1 == 2 and layer2 == 3 and layer3 == 7 and layer4 == 13 and layer5 == 14)
-    {
-            return RMSE < 0.495f;
-    }
-    else if(layer1 == 2 and layer2 == 3 and layer3 == 12 and layer4 == 13 and layer5 == 14)
-    {
-        return RMSE < 0.695f; 
-    }
-    else if(layer1 == 2 and layer2 == 7 and layer3 == 8)
-    {
-        if(layer4 == 9 and layer5 == 15)
-        {
-            return RMSE < 0.735f;
-        }
-        else if(layer4 == 14 and layer5 == 15)
-        {
-            return RMSE < 0.525f;
-        }
-    }
-    else if(layer1 == 2 and layer2 == 7 and layer3 == 13 and layer4 == 14 and layer5 == 15)
-    {
-        return RMSE < 0.665f;
-    }
-    else if(layer1 == 3 and layer2 == 4 and layer3 == 5 and layer4 == 12 and layer5 == 13)
-    {
-        return RMSE < 0.995f;
-    }
-    else if(layer1 == 3 and layer2 == 4 and layer3 == 12 and layer4 == 13 and layer5 == 14)
-    {
-        return RMSE < 0.525f;
-    }
-    else if(layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15)
-    {
-        return RMSE < 0.525f;
-    }
-    else if(layer1 == 3 and layer2 == 7 and layer3 == 13 and layer4 == 14 and layer5 == 15)
-    {
-        return RMSE < 0.745f;
-    }
-    else if(layer1 == 3 and layer2 == 12 and layer3 == 13 and layer4 == 14 and layer5 == 15)
-    {
-        return RMSE < 0.555f; 
-    }
-    else if(layer1 == 7 and layer2 == 8 and layer3 == 9 and layer4 == 15 and layer5 == 16)
-    {
-            return RMSE < 0.525f;
-    }
-    else if(layer1 == 7 and layer2 == 8 and layer3 == 14 and layer4 == 15 and layer5 == 16)
-    {
-        return RMSE < 0.885f;
-    }
-    else if(layer1 == 7 and layer2 == 13 and layer3 == 14 and layer4 == 15 and layer5 == 16)
-    {
-        return RMSE < 0.845f;
-    }
-
-    return true;
-}
-
-ALPAKA_FN_ACC bool SDL::checkIntervalOverlap(const float& firstMin, const float& firstMax, const float& secondMin, const float& secondMax)
-{
-    return ((firstMin <= secondMin) & (secondMin < firstMax)) |  ((secondMin < firstMin) & (firstMin < secondMax));
-}
-
-/*bounds for high Pt taken from : http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_efficiency/efficiencies/new_efficiencies/efficiencies_20210513_T5_recovering_high_Pt_efficiencies/highE_radius_matching/highE_bounds.txt */
-
-ALPAKA_FN_ACC bool SDL::matchRadiiBBBBB(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
-{
-    float innerInvRadiusErrorBound =  0.1512f;
-    float bridgeInvRadiusErrorBound = 0.1781f;
-    float outerInvRadiusErrorBound = 0.1840f;
-
-    if(innerRadius > 2.0f/(2.f * k2Rinv1GeVf))
-    {
-        innerInvRadiusErrorBound = 0.4449f;
-        bridgeInvRadiusErrorBound = 0.4033f;
-        outerInvRadiusErrorBound = 0.8016f;
-    }
-
-    innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
-    innerInvRadiusMin = fmaxf(0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
-
-    bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
-    bridgeInvRadiusMin = fmaxf(0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
-
-    outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
-    outerInvRadiusMin = fmaxf(0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
-
-    return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax);
-}
-
-ALPAKA_FN_ACC bool SDL::matchRadiiBBBBE(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
-{
-
-    float innerInvRadiusErrorBound =  0.1781f;
-    float bridgeInvRadiusErrorBound = 0.2167f;
-    float outerInvRadiusErrorBound = 1.1116f;
-
-    if(innerRadius > 2.0f/(2.f * k2Rinv1GeVf))
-    {
-        innerInvRadiusErrorBound = 0.4750f;
-        bridgeInvRadiusErrorBound = 0.3903f;
-        outerInvRadiusErrorBound = 15.2120f;
-    }
-
-    innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
-    innerInvRadiusMin = fmaxf(0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
-
-    bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
-    bridgeInvRadiusMin = fmaxf(0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
-
-    outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
-    outerInvRadiusMin = fmaxf(0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
-
-    return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax);
-}
-
-ALPAKA_FN_ACC bool SDL::matchRadiiBBBEE12378(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
-{
-    float innerInvRadiusErrorBound = 0.178f;
-    float bridgeInvRadiusErrorBound = 0.507f;
-    float outerInvRadiusErrorBound = 7.655f;
-
-    innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
-    innerInvRadiusMin = fmaxf(0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
-
-    bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
-    bridgeInvRadiusMin = fmaxf(0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
-
-    outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
-    outerInvRadiusMin = fmaxf(0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
-
-    return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, fminf(bridgeInvRadiusMin, 1.0f/bridgeRadiusMax2S), fmaxf(bridgeInvRadiusMax, 1.0f/bridgeRadiusMin2S));
-}
-
-ALPAKA_FN_ACC bool SDL::matchRadiiBBBEE23478(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
-{
-    float innerInvRadiusErrorBound = 0.2097f;
-    float bridgeInvRadiusErrorBound = 0.8557f;
-    float outerInvRadiusErrorBound = 24.0450f;
-
-    innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
-    innerInvRadiusMin = fmaxf(0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
-
-    bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
-    bridgeInvRadiusMin = fmaxf(0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
-
-    outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
-    outerInvRadiusMin = fmaxf(0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
-
-    return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, fminf(bridgeInvRadiusMin, 1.0f/bridgeRadiusMax2S), fmaxf(bridgeInvRadiusMax, 1.0f/bridgeRadiusMin2S));
-
-}
-
-ALPAKA_FN_ACC bool SDL::matchRadiiBBBEE34578(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
-{
-    float innerInvRadiusErrorBound = 0.066f;
-    float bridgeInvRadiusErrorBound = 0.617f;
-    float outerInvRadiusErrorBound = 2.688f;
-
-    innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
-    innerInvRadiusMin = fmaxf(0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
-
-    bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
-    bridgeInvRadiusMin = fmaxf(0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
-
-    outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
-    outerInvRadiusMin = fmaxf(0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
-
-    return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, fminf(bridgeInvRadiusMin, 1.0f/bridgeRadiusMax2S), fmaxf(bridgeInvRadiusMax, 1.0f/bridgeRadiusMin2S));
-
-}
-
-ALPAKA_FN_ACC bool SDL::matchRadiiBBBEE(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
-{
-
-    float innerInvRadiusErrorBound =  0.1840f;
-    float bridgeInvRadiusErrorBound = 0.5971f;
-    float outerInvRadiusErrorBound = 11.7102f;
-
-    if(innerRadius > 2.0f/(2.f * k2Rinv1GeVf)) //as good as no selections
-    {
-        innerInvRadiusErrorBound = 1.0412f;
-        outerInvRadiusErrorBound = 32.2737f;
-        bridgeInvRadiusErrorBound = 10.9688f;
-    }
-
-    innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
-    innerInvRadiusMin = fmaxf(0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
-
-    bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
-    bridgeInvRadiusMin = fmaxf(0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
-
-    outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
-    outerInvRadiusMin = fmaxf(0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
-
-    return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, fminf(bridgeInvRadiusMin, 1.0f/bridgeRadiusMax2S), fmaxf(bridgeInvRadiusMax, 1.0f/bridgeRadiusMin2S));
-
-}
-
-ALPAKA_FN_ACC bool SDL::matchRadiiBBEEE(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
-{
-    float innerInvRadiusErrorBound =  0.6376f;
-    float bridgeInvRadiusErrorBound = 2.1381f;
-    float outerInvRadiusErrorBound = 20.4179f;
-
-    if(innerRadius > 2.0f/(2.f * k2Rinv1GeVf)) //as good as no selections!
-    {
-        innerInvRadiusErrorBound = 12.9173f;
-        outerInvRadiusErrorBound = 25.6702f;
-        bridgeInvRadiusErrorBound = 5.1700f;
-    }
-
-    innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
-    innerInvRadiusMin = fmaxf(0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
-
-    bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
-    bridgeInvRadiusMin = fmaxf(0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
-
-    outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
-    outerInvRadiusMin = fmaxf(0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
-
-    return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, fminf(bridgeInvRadiusMin, 1.0f/bridgeRadiusMax2S), fmaxf(bridgeInvRadiusMax, 1.0f/bridgeRadiusMin2S));
-
-}
-
-ALPAKA_FN_ACC bool SDL::matchRadiiBEEEE(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
-{
-
-    float innerInvRadiusErrorBound =  1.9382f;
-    float bridgeInvRadiusErrorBound = 3.7280f;
-    float outerInvRadiusErrorBound = 5.7030f;
-
-
-    if(innerRadius > 2.0f/(2.f * k2Rinv1GeVf))
-    {
-        innerInvRadiusErrorBound = 23.2713f;
-        outerInvRadiusErrorBound = 24.0450f;
-        bridgeInvRadiusErrorBound = 21.7980f;
-    }
-
-    innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
-    innerInvRadiusMin = fmaxf(0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
-
-    bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
-    bridgeInvRadiusMin = fmaxf(0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
-
-    outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
-    outerInvRadiusMin = fmaxf(0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
-
-    return checkIntervalOverlap(fminf(innerInvRadiusMin, 1.0/innerRadiusMax2S), fmaxf(innerInvRadiusMax, 1.0/innerRadiusMin2S), fminf(bridgeInvRadiusMin, 1.0/bridgeRadiusMax2S), fmaxf(bridgeInvRadiusMax, 1.0/bridgeRadiusMin2S));
-}
-
-ALPAKA_FN_ACC bool SDL::matchRadiiEEEEE(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
-{
-    float innerInvRadiusErrorBound =  1.9382f;
-    float bridgeInvRadiusErrorBound = 2.2091f;
-    float outerInvRadiusErrorBound = 7.4084f;
-
-    if(innerRadius > 2.0f/(2.f * k2Rinv1GeVf))
-    {
-        innerInvRadiusErrorBound = 22.5226f;
-        bridgeInvRadiusErrorBound = 21.0966f;
-        outerInvRadiusErrorBound = 19.1252f;
-    }
-
-    innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
-    innerInvRadiusMin = fmaxf(0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
-
-    bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
-    bridgeInvRadiusMin = fmaxf(0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
-
-    outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
-    outerInvRadiusMin = fmaxf(0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
-
-    return checkIntervalOverlap(fminf(innerInvRadiusMin, 1.0/innerRadiusMax2S), fmaxf(innerInvRadiusMax, 1.0/innerRadiusMin2S), fminf(bridgeInvRadiusMin, 1.0/bridgeRadiusMax2S), fmaxf(bridgeInvRadiusMax, 1.0/bridgeRadiusMin2S));
-}
-
-ALPAKA_FN_ACC void SDL::computeErrorInRadius(float* x1Vec, float* y1Vec, float* x2Vec, float* y2Vec, float* x3Vec, float* y3Vec, float& minimumRadius, float& maximumRadius)
-{
-    //brute force
-    float candidateRadius;
-    float g, f;
-    minimumRadius = 123456789.f;
-    maximumRadius = 0.f;
-    for(size_t i = 0; i < 3; i++)
-    {
-        float x1 = x1Vec[i];
-	float y1 = y1Vec[i];
-        for(size_t j = 0; j < 3; j++)
-        {
-	    float x2 = x2Vec[j];
-	    float y2 = y2Vec[j];
-            for(size_t k = 0; k < 3; k++)
-            {
-	       float x3 = x3Vec[k];
-               float y3 = y3Vec[k];
-               candidateRadius = computeRadiusFromThreeAnchorHits(x1, y1, x2, y2, x3, y3, g, f);
-               maximumRadius = fmaxf(candidateRadius, maximumRadius);
-               minimumRadius = fminf(candidateRadius, minimumRadius);
-            }
-        }
-    }
-}
-ALPAKA_FN_ACC float SDL::computeRadiusFromThreeAnchorHits(float x1, float y1, float x2, float y2, float x3, float y3, float& g, float& f)
-{
-    float radius = 0.f;
-
-    //writing manual code for computing radius, which obviously sucks
-    //TODO:Use fancy inbuilt libraries like cuBLAS or cuSOLVE for this!
-    //(g,f) -> center
-    //first anchor hit - (x1,y1), second anchor hit - (x2,y2), third anchor hit - (x3, y3)
-
-    /*
-    if((y1 - y3) * (x2 - x3) - (x1 - x3) * (y2 - y3) == 0)
-    {
-        return -1; //WTF man three collinear points!
-    }
-    */
-
-    float denomInv = 1.0f/((y1 - y3) * (x2 - x3) - (x1 - x3) * (y2 - y3));
-
-    float xy1sqr = x1 * x1 + y1 * y1;
-
-    float xy2sqr = x2 * x2 + y2 * y2;
-
-    float xy3sqr = x3 * x3 + y3 * y3;
-
-    g = 0.5f * ((y3 - y2) * xy1sqr + (y1 - y3) * xy2sqr + (y2 - y1) * xy3sqr) * denomInv;
-
-    f = 0.5f * ((x2 - x3) * xy1sqr + (x3 - x1) * xy2sqr + (x1 - x2) * xy3sqr) * denomInv;
-
-    float c = ((x2 * y3 - x3 * y2) * xy1sqr + (x3 * y1 - x1 * y3) * xy2sqr + (x1 * y2 - x2 * y1) * xy3sqr) * denomInv;
-
-    if(((y1 - y3) * (x2 - x3) - (x1 - x3) * (y2 - y3) == 0) || (g * g + f * f - c < 0))
-    {
-        printf("three collinear points or FATAL! r^2 < 0!\n");
-	radius = -1.f;
-    }
-    else
-      radius = sqrtf(g * g  + f * f - c);
-
-    return radius;
-}
-
-ALPAKA_FN_ACC bool SDL::T5HasCommonMiniDoublet(struct SDL::triplets& tripletsInGPU, struct SDL::segments& segmentsInGPU, unsigned int innerTripletIndex, unsigned int outerTripletIndex)
-{
-    unsigned int innerOuterSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex + 1];
-    unsigned int outerInnerSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex];
-    unsigned int innerOuterOuterMiniDoubletIndex = segmentsInGPU.mdIndices[2 * innerOuterSegmentIndex + 1]; //inner triplet outer segment outer MD index
-    unsigned int outerInnerInnerMiniDoubletIndex = segmentsInGPU.mdIndices[2 * outerInnerSegmentIndex]; //outer triplet inner segmnet inner MD index
-
-
-    return (innerOuterOuterMiniDoubletIndex == outerInnerInnerMiniDoubletIndex);
-}
-
-ALPAKA_FN_ACC void SDL::computeSigmasForRegression(SDL::modules& modulesInGPU, const uint16_t* lowerModuleIndices, float* delta1, float* delta2, float* slopes, bool* isFlat, int nPoints, bool anchorHits) 
-{
-   /*bool anchorHits required to deal with a weird edge case wherein 
-     the hits ultimately used in the regression are anchor hits, but the
-     lower modules need not all be Pixel Modules (in case of PS). Similarly,
-     when we compute the chi squared for the non-anchor hits, the "partner module"
-     need not always be a PS strip module, but all non-anchor hits sit on strip 
-     modules.
-    */
-    ModuleType moduleType;
-    short moduleSubdet, moduleSide;
-    float inv1 = 0.01f/0.009f;
-    float inv2 = 0.15f/0.009f;
-    float inv3 = 2.4f/0.009f;
-    for(size_t i=0; i<nPoints; i++)
-    {
-        moduleType = modulesInGPU.moduleType[lowerModuleIndices[i]];
-        moduleSubdet = modulesInGPU.subdets[lowerModuleIndices[i]];
-        moduleSide = modulesInGPU.sides[lowerModuleIndices[i]];
-        float& drdz = modulesInGPU.drdzs[lowerModuleIndices[i]];
-        slopes[i] = modulesInGPU.slopes[lowerModuleIndices[i]]; 
-        //category 1 - barrel PS flat
-        if(moduleSubdet == Barrel and moduleType == PS and moduleSide == Center)
-        {
-            delta1[i] = inv1;//1.1111f;//0.01;
-            delta2[i] = inv1;//1.1111f;//0.01;
-            slopes[i] = -999.f;
-            isFlat[i] = true;
-        }
-
-        //category 2 - barrel 2S
-        else if(moduleSubdet == Barrel and moduleType == TwoS)
-        {
-            delta1[i] = 1.f;//0.009;
-            delta2[i] = 1.f;//0.009;
-            slopes[i] = -999.f;
-            isFlat[i] = true;
-        }
-
-        //category 3 - barrel PS tilted
-        else if(moduleSubdet == Barrel and moduleType == PS and moduleSide != Center)
-        {
-
-            //delta1[i] = 0.01;
-            delta1[i] = inv1;//1.1111f;//0.01;
-            isFlat[i] = false;
-
-            if(anchorHits)
-            {
-                delta2[i] = (inv2 * drdz/sqrtf(1 + drdz * drdz));
-            }
-            else
-            {
-                delta2[i] = (inv3 * drdz/sqrtf(1 + drdz * drdz));
-            }
-        }
-        //category 4 - endcap PS
-        else if(moduleSubdet == Endcap and moduleType == PS)
-        {
-            delta1[i] = inv1;//1.1111f;//0.01;
-            isFlat[i] = false;
-
-            /*despite the type of the module layer of the lower module index,
-            all anchor hits are on the pixel side and all non-anchor hits are
-            on the strip side!*/
-            if(anchorHits)
-            {
-                delta2[i] = inv2;//16.6666f;//0.15f;
-            }
-            else
-            {
-                delta2[i] = inv3;//266.666f;//2.4f;
-            }
-        }
-
-        //category 5 - endcap 2S
-        else if(moduleSubdet == Endcap and moduleType == TwoS)
-        {
-            delta1[i] = 1.f;//0.009;
-            delta2[i] = 500.f*inv1;//555.5555f;//5.f;
-            isFlat[i] = false;
-        }
-        else
-        {
-            printf("ERROR!!!!! I SHOULDN'T BE HERE!!!! subdet = %d, type = %d, side = %d\n", moduleSubdet, moduleType, moduleSide);
-        }
-    }
-}
-
-ALPAKA_FN_ACC float SDL::computeRadiusUsingRegression(int nPoints, float* xs, float* ys, float* delta1, float* delta2, float* slopes, bool* isFlat, float& g, float& f, float* sigmas, float& chiSquared)
-{
-    float radius = 0.f;
-
-    //some extra variables
-    //the two variables will be caled x1 and x2, and y (which is x^2 + y^2)
-
-    float sigmaX1Squared = 0.f;
-    float sigmaX2Squared = 0.f;
-    float sigmaX1X2 = 0.f; 
-    float sigmaX1y = 0.f; 
-    float sigmaX2y = 0.f;
-    float sigmaY = 0.f;
-    float sigmaX1 = 0.f;
-    float sigmaX2 = 0.f;
-    float sigmaOne = 0.f;
-
-    float xPrime, yPrime, absArctanSlope, angleM;
-    for(size_t i = 0; i < nPoints; i++)
-    {
-        //computing sigmas is a very tricky affair
-        //if the module is tilted or endcap, we need to use the slopes properly!
-
-        absArctanSlope = ((slopes[i] != SDL::SDL_INF) ? fabs(atanf(slopes[i])) : 0.5f*float(M_PI));
-
-        if(xs[i] > 0 and ys[i] > 0)
-        {
-            angleM = 0.5f*float(M_PI) - absArctanSlope;
-        }
-        else if(xs[i] < 0 and ys[i] > 0)
-        {
-            angleM = absArctanSlope + 0.5f*float(M_PI);
-        }
-        else if(xs[i] < 0 and ys[i] < 0)
-        {
-            angleM = -(absArctanSlope + 0.5f*float(M_PI));
-        }
-        else if(xs[i] > 0 and ys[i] < 0)
-        {
-            angleM = -(0.5f*float(M_PI) - absArctanSlope);
-        }
-
-        if(not isFlat[i])
-        {
-            xPrime = xs[i] * cosf(angleM) + ys[i] * sinf(angleM);
-            yPrime = ys[i] * cosf(angleM) - xs[i] * sinf(angleM);
-        }
-        else
-        {
-            xPrime = xs[i];
-            yPrime = ys[i];
-        }
-        sigmas[i] = 2 * sqrtf((xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i]));
-
-        sigmaX1Squared += (xs[i] * xs[i])/(sigmas[i] * sigmas[i]);
-        sigmaX2Squared += (ys[i] * ys[i])/(sigmas[i] * sigmas[i]);
-        sigmaX1X2 += (xs[i] * ys[i])/(sigmas[i] * sigmas[i]);
-        sigmaX1y += (xs[i] * (xs[i] * xs[i] + ys[i] * ys[i]))/(sigmas[i] * sigmas[i]);
-        sigmaX2y += (ys[i] * (xs[i] * xs[i] + ys[i] * ys[i]))/(sigmas[i] * sigmas[i]);
-        sigmaY += (xs[i] * xs[i] + ys[i] * ys[i])/(sigmas[i] * sigmas[i]);
-        sigmaX1 += xs[i]/(sigmas[i] * sigmas[i]);
-        sigmaX2 += ys[i]/(sigmas[i] * sigmas[i]);
-        sigmaOne += 1.0f/(sigmas[i] * sigmas[i]);
-    }
-    float denominator = (sigmaX1X2 - sigmaX1 * sigmaX2) * (sigmaX1X2 - sigmaX1 * sigmaX2) - (sigmaX1Squared - sigmaX1 * sigmaX1) * (sigmaX2Squared - sigmaX2 * sigmaX2);
-
-    float twoG = ((sigmaX2y - sigmaX2 * sigmaY) * (sigmaX1X2 - sigmaX1 * sigmaX2) - (sigmaX1y - sigmaX1 * sigmaY) * (sigmaX2Squared - sigmaX2 * sigmaX2)) / denominator;
-    float twoF = ((sigmaX1y - sigmaX1 * sigmaY) * (sigmaX1X2 - sigmaX1 * sigmaX2) - (sigmaX2y - sigmaX2 * sigmaY) * (sigmaX1Squared - sigmaX1 * sigmaX1)) / denominator;
-
-    float c = -(sigmaY - twoG * sigmaX1 - twoF * sigmaX2)/sigmaOne;
-    g = 0.5f*twoG;
-    f = 0.5f*twoF;
-    if(g * g + f * f - c < 0)
-    {
-        printf("FATAL! r^2 < 0!\n");
-        return -1;
-    }
-    
-    radius = sqrtf(g * g  + f * f - c);
-    //compute chi squared
-    chiSquared = 0.f;
-    for(size_t i = 0; i < nPoints; i++)
-    {
-       chiSquared +=  (xs[i] * xs[i] + ys[i] * ys[i] - twoG * xs[i] - twoF * ys[i] + c) * (xs[i] * xs[i] + ys[i] * ys[i] - twoG * xs[i] - twoF * ys[i] + c) / (sigmas[i] * sigmas[i]);
-    }
-    return radius;
-}
-
-ALPAKA_FN_ACC float SDL::computeChiSquared(int nPoints, float* xs, float* ys, float* delta1, float* delta2, float* slopes, bool* isFlat, float g, float f, float radius)
-{
-    // given values of (g, f, radius) and a set of points (and its uncertainties)
-    //compute chi squared
-    float c = g*g + f*f - radius*radius;
-    float chiSquared = 0.f;
-    float absArctanSlope, angleM, xPrime, yPrime, sigma;
-    for(size_t i = 0; i < nPoints; i++)
-    {
-        absArctanSlope = ((slopes[i] != SDL::SDL_INF) ? fabs(atanf(slopes[i])) : 0.5f*float(M_PI));
-        if(xs[i] > 0 and ys[i] > 0)
-        {
-            angleM = 0.5f*float(M_PI) - absArctanSlope;
-        }
-        else if(xs[i] < 0 and ys[i] > 0)
-        {
-            angleM = absArctanSlope + 0.5f*float(M_PI);
-        }
-        else if(xs[i] < 0 and ys[i] < 0)
-        {
-            angleM = -(absArctanSlope + 0.5f*float(M_PI));
-        }
-        else if(xs[i] > 0 and ys[i] < 0)
-        {
-            angleM = -(0.5f*float(M_PI) - absArctanSlope);
-        }
-
-        if(not isFlat[i])
-        {
-            xPrime = xs[i] * cosf(angleM) + ys[i] * sinf(angleM);
-            yPrime = ys[i] * cosf(angleM) - xs[i] * sinf(angleM);
-        }
-        else
-        {
-            xPrime = xs[i];
-            yPrime = ys[i];
-        }
-        sigma = 2 * sqrtf((xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i]));
-        chiSquared +=  (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) * (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) / (sigma * sigma);
-    }
-    return chiSquared; 
-}
-
-__global__ void SDL::createQuintupletsInGPUv2(struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, struct SDL::triplets& tripletsInGPU, struct SDL::quintuplets& quintupletsInGPU, struct SDL::objectRanges& rangesInGPU, uint16_t nEligibleT5Modules)
-{
-    int gidy = blockIdx.y * blockDim.y + threadIdx.y;
-    int npy = gridDim.y * blockDim.y;
-    int gidx = blockIdx.x * blockDim.x + threadIdx.x;
-    int npx = gridDim.x * blockDim.x;
-    int gidz = blockIdx.z * blockDim.z + threadIdx.z;
-    int npz = gridDim.z * blockDim.z;
-
-    for (int iter=gidz; iter < nEligibleT5Modules; iter+=npz){
-      uint16_t lowerModule1 = rangesInGPU.indicesOfEligibleT5Modules[iter];
-
-
-      unsigned int nInnerTriplets = tripletsInGPU.nTriplets[lowerModule1];
-      for( unsigned int innerTripletArrayIndex =gidy; innerTripletArrayIndex < nInnerTriplets; innerTripletArrayIndex+=npy){
-
-      unsigned int innerTripletIndex = rangesInGPU.tripletModuleIndices[lowerModule1] + innerTripletArrayIndex;
-      uint16_t lowerModule2 = tripletsInGPU.lowerModuleIndices[3 * innerTripletIndex + 1];
-      uint16_t lowerModule3 = tripletsInGPU.lowerModuleIndices[3 * innerTripletIndex + 2];
-      unsigned int nOuterTriplets = tripletsInGPU.nTriplets[lowerModule3];
-        for (int outerTripletArrayIndex=gidx; outerTripletArrayIndex < nOuterTriplets; outerTripletArrayIndex+=npx)
-        {
-            unsigned int outerTripletIndex = rangesInGPU.tripletModuleIndices[lowerModule3] + outerTripletArrayIndex;
-            uint16_t lowerModule4 = tripletsInGPU.lowerModuleIndices[3 * outerTripletIndex + 1];
-            uint16_t lowerModule5 = tripletsInGPU.lowerModuleIndices[3 * outerTripletIndex + 2];
-
-            float innerRadius, outerRadius, bridgeRadius, regressionG, regressionF, regressionRadius, rzChiSquared, chiSquared, nonAnchorChiSquared; //required for making distributions
-
-            bool success = runQuintupletDefaultAlgo(modulesInGPU, mdsInGPU, segmentsInGPU, tripletsInGPU, lowerModule1, lowerModule2, lowerModule3, lowerModule4, lowerModule5, innerTripletIndex, outerTripletIndex, innerRadius, outerRadius,  bridgeRadius, regressionG, regressionF, regressionRadius, rzChiSquared, chiSquared, nonAnchorChiSquared);
-
-            if(success)
-            {
-                short layer2_adjustment;
-                int layer = modulesInGPU.layers[lowerModule1];
-                if(layer == 1)
-                {
-                    layer2_adjustment = 1;
-                } //get upper segment to be in second layer
-                else if(layer == 2)
-                {
-                    layer2_adjustment = 0;
-                } // get lower segment to be in second layer
-                else
-                {
-                    return;
-                } // ignore anything else TODO: move this to start, before object is made (faster)
-                unsigned int totOccupancyQuintuplets = atomicAdd(&quintupletsInGPU.totOccupancyQuintuplets[lowerModule1], 1);
-                if(totOccupancyQuintuplets >= (rangesInGPU.quintupletModuleIndices[lowerModule1 + 1] - rangesInGPU.quintupletModuleIndices[lowerModule1]))
-                {
-#ifdef Warnings
-                    printf("Quintuplet excess alert! Module index = %d\n", lowerModule1);
-#endif
-                }
-                else
-                {
-                    unsigned int quintupletModuleIndex = atomicAdd(&quintupletsInGPU.nQuintuplets[lowerModule1], 1);
-                    //this if statement should never get executed!
-                    if(rangesInGPU.quintupletModuleIndices[lowerModule1] == -1)
-                    {
-                        printf("Quintuplets : no memory for module at module index = %d\n", lowerModule1);
-                    }
-                    else
-                    {
-                        unsigned int quintupletIndex = rangesInGPU.quintupletModuleIndices[lowerModule1] +  quintupletModuleIndex;
-                        float phi = mdsInGPU.anchorPhi[segmentsInGPU.mdIndices[2*tripletsInGPU.segmentIndices[2*innerTripletIndex+layer2_adjustment]]];
-                        float eta = mdsInGPU.anchorEta[segmentsInGPU.mdIndices[2*tripletsInGPU.segmentIndices[2*innerTripletIndex+layer2_adjustment]]];
-                        float pt = (innerRadius+outerRadius)*3.8f*1.602f/(2*100*5.39f);
-                        float scores = chiSquared + nonAnchorChiSquared;
-                        addQuintupletToMemory(tripletsInGPU, quintupletsInGPU, innerTripletIndex, outerTripletIndex, lowerModule1, lowerModule2, lowerModule3, lowerModule4, lowerModule5, innerRadius, bridgeRadius, outerRadius, regressionG, regressionF, regressionRadius, rzChiSquared, chiSquared, nonAnchorChiSquared, pt,eta,phi,scores,layer,quintupletIndex);
-
-                        tripletsInGPU.partOfT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex]] = true;
-                        tripletsInGPU.partOfT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1]] = true;
-                    }
-                }
-            }
-        }
-      }
-    }
-}
-
-ALPAKA_FN_ACC bool SDL::runQuintupletDefaultAlgoBBBB(struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, uint16_t& innerInnerLowerModuleIndex, uint16_t& innerOuterLowerModuleIndex, uint16_t& outerInnerLowerModuleIndex, uint16_t& outerOuterLowerModuleIndex, unsigned int& innerSegmentIndex, unsigned int& outerSegmentIndex, unsigned int& firstMDIndex, unsigned int& secondMDIndex, unsigned int& thirdMDIndex,
-        unsigned int& fourthMDIndex, float& zOut, float& rtOut, float& deltaPhiPos, float& dPhi, float& betaIn, float&
-        betaOut, float& pt_beta, float& zLo, float& zHi, float& zLoPointed, float& zHiPointed, float& sdlCut, float& betaInCut, float& betaOutCut, float& deltaBetaCut)
-{
-    bool pass = true;
-
-    bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == SDL::PS);
-    bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == SDL::PS);
-
-    float rt_InLo = mdsInGPU.anchorRt[firstMDIndex];
-    float rt_InOut = mdsInGPU.anchorRt[secondMDIndex];
-    float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex];
-
-    float z_InLo = mdsInGPU.anchorZ[firstMDIndex];
-    float z_InOut = mdsInGPU.anchorZ[secondMDIndex];
-    float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex];
-
-    float alpha1GeV_OutLo = asinf(fminf(rt_OutLo * SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax));
-
-    float rtRatio_OutLoInLo = rt_OutLo / rt_InLo; // Outer segment beginning rt divided by inner segment beginning rt;
-    float dzDrtScale = tanf(alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly
-    float zpitch_InLo = (isPS_InLo ? SDL::pixelPSZpitch : SDL::strip2SZpitch);
-    float zpitch_OutLo = (isPS_OutLo ? SDL::pixelPSZpitch : SDL::strip2SZpitch);
-
-    zHi = z_InLo + (z_InLo + SDL::deltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo < 0.f ? 1.f : dzDrtScale) + (zpitch_InLo + zpitch_OutLo);
-    zLo = z_InLo + (z_InLo - SDL::deltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo > 0.f ? 1.f : dzDrtScale) - (zpitch_InLo + zpitch_OutLo);
-
-
-    //Cut 1 - z compatibility
-    zOut = z_OutLo;
-    rtOut = rt_OutLo;
-    pass = pass and ((z_OutLo >= zLo) & (z_OutLo <= zHi));
-    if(not pass) return pass;
-
-    float drt_OutLo_InLo = (rt_OutLo - rt_InLo);
-    float r3_InLo = sqrtf(z_InLo * z_InLo + rt_InLo * rt_InLo);
-    float drt_InSeg = rt_InOut - rt_InLo;
-    float dz_InSeg = z_InOut - z_InLo;
-    float dr3_InSeg = sqrtf(rt_InOut * rt_InOut + z_InOut * z_InOut) - sqrtf(rt_InLo * rt_InLo + z_InLo * z_InLo);
-
-    float coshEta = dr3_InSeg/drt_InSeg;
-    float dzErr = (zpitch_InLo + zpitch_OutLo) * (zpitch_InLo + zpitch_OutLo) * 2.f;
-
-    float sdlThetaMulsF = 0.015f * sqrtf(0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f) * sqrtf(r3_InLo / rt_InLo);
-    float sdlMuls = sdlThetaMulsF * 3.f / SDL::ptCut * 4.f; // will need a better guess than x4?
-    dzErr += sdlMuls * sdlMuls * drt_OutLo_InLo * drt_OutLo_InLo / 3.f * coshEta * coshEta; //sloppy
-    dzErr = sqrtf(dzErr);
-
-    // Constructing upper and lower bound
-    const float dzMean = dz_InSeg / drt_InSeg * drt_OutLo_InLo;
-    const float zWindow = dzErr / drt_InSeg * drt_OutLo_InLo + (zpitch_InLo + zpitch_OutLo); //FIXME for SDL::ptCut lower than ~0.8 need to add curv path correction
-    zLoPointed = z_InLo + dzMean * (z_InLo > 0.f ? 1.f : dzDrtScale) - zWindow;
-    zHiPointed = z_InLo + dzMean * (z_InLo < 0.f ? 1.f : dzDrtScale) + zWindow;
-
-    // Cut #2: Pointed Z (Inner segment two MD points to outer segment inner MD)
-    pass =  pass and ((z_OutLo >= zLoPointed) & (z_OutLo <= zHiPointed));
-    if(not pass) return pass;
-
-    float sdlPVoff = 0.1f/rt_OutLo;
-    sdlCut = alpha1GeV_OutLo + sqrtf(sdlMuls * sdlMuls + sdlPVoff * sdlPVoff);
-
-    deltaPhiPos = SDL::deltaPhi(mdsInGPU.anchorX[secondMDIndex], mdsInGPU.anchorY[secondMDIndex], mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex]);
-    // Cut #3: FIXME:deltaPhiPos can be tighter
-    pass = pass and (fabsf(deltaPhiPos) <= sdlCut);
-    if(not pass) return pass;
-
-    float midPointX = 0.5f*(mdsInGPU.anchorX[firstMDIndex] + mdsInGPU.anchorX[thirdMDIndex]);
-    float midPointY = 0.5f* (mdsInGPU.anchorY[firstMDIndex] + mdsInGPU.anchorY[thirdMDIndex]);
-    float midPointZ = 0.5f*(mdsInGPU.anchorZ[firstMDIndex] + mdsInGPU.anchorZ[thirdMDIndex]);
-    float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex];
-    float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex];
-    float diffZ = mdsInGPU.anchorZ[thirdMDIndex] - mdsInGPU.anchorZ[firstMDIndex];
-
-    dPhi = SDL::deltaPhi(midPointX, midPointY, diffX, diffY);
-
-    // Cut #4: deltaPhiChange
-    pass = pass and (fabsf(dPhi) <= sdlCut);
-    //lots of array accesses below. Cut here!
-    if(not pass) return pass;
-
-    // First obtaining the raw betaIn and betaOut values without any correction and just purely based on the mini-doublet hit positions
-
-    float alpha_InLo  = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]);
-    float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]);
-
-    bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == SDL::Endcap and modulesInGPU.moduleType[outerOuterLowerModuleIndex] == SDL::TwoS;
-
-    float alpha_OutUp,alpha_OutUp_highEdge,alpha_OutUp_lowEdge;
-
-    alpha_OutUp = SDL::deltaPhi(mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex], mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]);
-
-    alpha_OutUp_highEdge = alpha_OutUp;
-    alpha_OutUp_lowEdge = alpha_OutUp;
-
-    float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex];
-    float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex];
-    float tl_axis_z = mdsInGPU.anchorZ[fourthMDIndex] - mdsInGPU.anchorZ[firstMDIndex];
-    float tl_axis_highEdge_x = tl_axis_x;
-    float tl_axis_highEdge_y = tl_axis_y;
-    float tl_axis_lowEdge_x = tl_axis_x;
-    float tl_axis_lowEdge_y = tl_axis_y;
-
-    betaIn = alpha_InLo - SDL::deltaPhi(mdsInGPU.anchorX[firstMDIndex], mdsInGPU.anchorY[firstMDIndex], tl_axis_x, tl_axis_y);
-
-    float betaInRHmin = betaIn;
-    float betaInRHmax = betaIn;
-    betaOut = -alpha_OutUp + SDL::deltaPhi(mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex], tl_axis_x, tl_axis_y);
-
-    float betaOutRHmin = betaOut;
-    float betaOutRHmax = betaOut;
-
-    if(isEC_lastLayer)
-    {
-        alpha_OutUp_highEdge = SDL::deltaPhi(mdsInGPU.anchorHighEdgeX[fourthMDIndex], mdsInGPU.anchorHighEdgeY[fourthMDIndex], mdsInGPU.anchorHighEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], mdsInGPU.anchorHighEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]);
-        alpha_OutUp_lowEdge = SDL::deltaPhi(mdsInGPU.anchorLowEdgeX[fourthMDIndex], mdsInGPU.anchorLowEdgeY[fourthMDIndex], mdsInGPU.anchorLowEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], mdsInGPU.anchorLowEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]);
-
-        tl_axis_highEdge_x = mdsInGPU.anchorHighEdgeX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex];
-        tl_axis_highEdge_y = mdsInGPU.anchorHighEdgeY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex];
-        tl_axis_lowEdge_x = mdsInGPU.anchorLowEdgeX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex];
-        tl_axis_lowEdge_y = mdsInGPU.anchorLowEdgeY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex];
-
-
-        betaOutRHmin = -alpha_OutUp_highEdge + SDL::deltaPhi(mdsInGPU.anchorHighEdgeX[fourthMDIndex], mdsInGPU.anchorHighEdgeY[fourthMDIndex], tl_axis_highEdge_x, tl_axis_highEdge_y);
-        betaOutRHmax = -alpha_OutUp_lowEdge + SDL::deltaPhi(mdsInGPU.anchorLowEdgeX[fourthMDIndex], mdsInGPU.anchorLowEdgeY[fourthMDIndex], tl_axis_lowEdge_x, tl_axis_lowEdge_y);
-    }
-
-    //beta computation
-    float drt_tl_axis = sqrtf(tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y);
-    float drt_tl_lowEdge = sqrtf(tl_axis_lowEdge_x * tl_axis_lowEdge_x + tl_axis_lowEdge_y * tl_axis_lowEdge_y);
-    float drt_tl_highEdge = sqrtf(tl_axis_highEdge_x * tl_axis_highEdge_x + tl_axis_highEdge_y * tl_axis_highEdge_y);
-
-    float corrF = 1.f;
-    //innerOuterAnchor - innerInnerAnchor
-    const float rt_InSeg = sqrtf((mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]));
-    betaInCut = asinf(fminf((-rt_InSeg * corrF + drt_tl_axis) * SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax)) + (0.02f / drt_InSeg);
-
-    //Cut #5: first beta cut
-    pass = pass and (fabsf(betaInRHmin) < betaInCut);
-    if(not pass) return pass;
-
-    float betaAv = 0.5f * (betaIn + betaOut);
-    pt_beta = drt_tl_axis * SDL::k2Rinv1GeVf/sinf(betaAv);
-    int lIn = 5;
-    int lOut = isEC_lastLayer ? 11 : 5;
-    float sdOut_dr = sqrtf((mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) * (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) + (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) * (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]));
-    float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex];
-
-    const float diffDr = fabsf(rt_InSeg - sdOut_dr) / fabsf(rt_InSeg + sdOut_dr);
-
-    SDL::runDeltaBetaIterationsT5(betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn);
-
-    const float betaInMMSF = (fabsf(betaInRHmin + betaInRHmax) > 0) ? (2.f * betaIn / fabsf(betaInRHmin + betaInRHmax)) : 0.f; //mean value of min,max is the old betaIn
-    const float betaOutMMSF = (fabsf(betaOutRHmin + betaOutRHmax) > 0) ? (2.f * betaOut / fabsf(betaOutRHmin + betaOutRHmax)) : 0.f;
-    betaInRHmin *= betaInMMSF;
-    betaInRHmax *= betaInMMSF;
-    betaOutRHmin *= betaOutMMSF;
-    betaOutRHmax *= betaOutMMSF;
-
-    const float dBetaMuls = sdlThetaMulsF * 4.f / fminf(fabsf(pt_beta), SDL::pt_betaMax); //need to confimm the range-out value of 7 GeV
-
-
-    const float alphaInAbsReg = fmaxf(fabsf(alpha_InLo), asinf(fminf(rt_InLo * SDL::k2Rinv1GeVf / 3.0f, SDL::sinAlphaMax)));
-    const float alphaOutAbsReg = fmaxf(fabs(alpha_OutLo), asinf(fminf(rt_OutLo * SDL::k2Rinv1GeVf / 3.0f, SDL::sinAlphaMax)));
-    const float dBetaInLum = lIn < 11 ? 0.0f : fabsf(alphaInAbsReg*SDL::deltaZLum / z_InLo);
-    const float dBetaOutLum = lOut < 11 ? 0.0f : fabsf(alphaOutAbsReg*SDL::deltaZLum / z_OutLo);
-    const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum);
-    const float sinDPhi = sinf(dPhi);
-
-    const float dBetaRIn2 = 0; // TODO-RH
-    // const float dBetaROut2 = 0; // TODO-RH
-    float dBetaROut = 0;
-    if(isEC_lastLayer)
-    {
-        dBetaROut = (sqrtf(mdsInGPU.anchorHighEdgeX[fourthMDIndex] * mdsInGPU.anchorHighEdgeX[fourthMDIndex] + mdsInGPU.anchorHighEdgeY[fourthMDIndex] * mdsInGPU.anchorHighEdgeY[fourthMDIndex]) - sqrtf(mdsInGPU.anchorLowEdgeX[fourthMDIndex] * mdsInGPU.anchorLowEdgeX[fourthMDIndex] + mdsInGPU.anchorLowEdgeY[fourthMDIndex] * mdsInGPU.anchorLowEdgeY[fourthMDIndex])) * sinDPhi / drt_tl_axis;
-    }
-
-    const float dBetaROut2 =  dBetaROut * dBetaROut;
-
-    betaOutCut = asinf(fminf(drt_tl_axis*SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax)) //FIXME: need faster version
-        + (0.02f / sdOut_d) + sqrtf(dBetaLum2 + dBetaMuls*dBetaMuls);
-
-    //Cut #6: The real beta cut
-    pass =  pass and ((fabsf(betaOut) < betaOutCut));
-    if(not pass) return pass;
-
-    float pt_betaIn = drt_tl_axis * SDL::k2Rinv1GeVf/sinf(betaIn);
-    float pt_betaOut = drt_tl_axis * SDL::k2Rinv1GeVf / sinf(betaOut);
-    float dBetaRes = 0.02f/fminf(sdOut_d,drt_InSeg);
-    float dBetaCut2 = (dBetaRes*dBetaRes * 2.0f + dBetaMuls * dBetaMuls + dBetaLum2 + dBetaRIn2 + dBetaROut2
-            + 0.25f * (fabsf(betaInRHmin - betaInRHmax) + fabsf(betaOutRHmin - betaOutRHmax)) * (fabsf(betaInRHmin - betaInRHmax) + fabsf(betaOutRHmin - betaOutRHmax)));
-
-    float dBeta = betaIn - betaOut;
-    deltaBetaCut = sqrtf(dBetaCut2);
-    pass = pass and (dBeta * dBeta <= dBetaCut2);
-
-    return pass;
-}
-
-ALPAKA_FN_ACC bool SDL::runQuintupletDefaultAlgoBBEE(struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, uint16_t& innerInnerLowerModuleIndex, uint16_t& innerOuterLowerModuleIndex, uint16_t& outerInnerLowerModuleIndex, uint16_t& outerOuterLowerModuleIndex, unsigned int& innerSegmentIndex, unsigned int& outerSegmentIndex, unsigned int& firstMDIndex, unsigned int& secondMDIndex, unsigned int& thirdMDIndex,
-        unsigned int& fourthMDIndex, float& zOut, float& rtOut, float& deltaPhiPos, float& dPhi, float& betaIn, float&
-        betaOut, float& pt_beta, float& zLo, float& rtLo, float& rtHi, float& sdlCut, float& betaInCut, float& betaOutCut, float& deltaBetaCut, float& kZ)
-{
-    bool pass = true;
-    bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == SDL::PS);
-    bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == SDL::PS);
-
-    float rt_InLo = mdsInGPU.anchorRt[firstMDIndex];
-    float rt_InOut = mdsInGPU.anchorRt[secondMDIndex];
-    float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex];
-
-    float z_InLo = mdsInGPU.anchorZ[firstMDIndex];
-    float z_InOut = mdsInGPU.anchorZ[secondMDIndex];
-    float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex];
-
-    float alpha1GeV_OutLo = asinf(fminf(rt_OutLo * SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax));
-
-    float rtRatio_OutLoInLo = rt_OutLo / rt_InLo; // Outer segment beginning rt divided by inner segment beginning rt;
-    float dzDrtScale = tanf(alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly
-    float zpitch_InLo = (isPS_InLo ? SDL::pixelPSZpitch : SDL::strip2SZpitch);
-    float zpitch_OutLo = (isPS_OutLo ? SDL::pixelPSZpitch : SDL::strip2SZpitch);
-    float zGeom = zpitch_InLo + zpitch_OutLo;
-
-    zLo = z_InLo + (z_InLo - SDL::deltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo > 0.f ? 1.f : dzDrtScale) - zGeom;
-
-    // Cut #0: Preliminary (Only here in endcap case)
-    pass = pass and (z_InLo * z_OutLo > 0);
-    if(not pass) return pass;
-
-    float dLum = copysignf(SDL::deltaZLum, z_InLo);
-    bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == SDL::PS;
-    float rtGeom1 = isOutSgInnerMDPS ? SDL::pixelPSZpitch : SDL::strip2SZpitch;
-    float zGeom1 = copysignf(zGeom,z_InLo);
-    rtLo = rt_InLo * (1.f + (z_OutLo - z_InLo - zGeom1) / (z_InLo + zGeom1 + dLum) / dzDrtScale) - rtGeom1; //slope correction only on the lower end
-    zOut = z_OutLo;
-    rtOut = rt_OutLo;
-
-    //Cut #1: rt condition
-    pass =  pass and (rtOut >= rtLo);
-    if(not pass) return pass;
-
-    float zInForHi = z_InLo - zGeom1 - dLum;
-    if(zInForHi * z_InLo < 0)
-    {
-        zInForHi = copysignf(0.1f,z_InLo);
-    }
-    rtHi = rt_InLo * (1.f + (z_OutLo - z_InLo + zGeom1) / zInForHi) + rtGeom1;
-
-    //Cut #2: rt condition
-    pass =  pass and ((rt_OutLo >= rtLo) & (rt_OutLo <= rtHi));
-    if(not pass) return pass;
-
-    float rIn = sqrtf(z_InLo * z_InLo + rt_InLo * rt_InLo);
-    const float drtSDIn = rt_InOut - rt_InLo;
-    const float dzSDIn = z_InOut - z_InLo;
-    const float dr3SDIn = sqrtf(rt_InOut * rt_InOut + z_InOut * z_InOut) - sqrtf(rt_InLo * rt_InLo + z_InLo * z_InLo);
-
-    const float coshEta = dr3SDIn / drtSDIn; //direction estimate
-    const float dzOutInAbs = fabsf(z_OutLo - z_InLo);
-    const float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f);
-    const float zGeom1_another = SDL::pixelPSZpitch;
-    kZ = (z_OutLo - z_InLo) / dzSDIn;
-    float drtErr = zGeom1_another * zGeom1_another * drtSDIn * drtSDIn / dzSDIn / dzSDIn * (1.f - 2.f * kZ + 2.f * kZ * kZ);
-    const float sdlThetaMulsF = 0.015f * sqrtf(0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f) * sqrtf(rIn / rt_InLo);
-    const float sdlMuls = sdlThetaMulsF * 3.f / SDL::ptCut * 4.f; //will need a better guess than x4?
-    drtErr += sdlMuls * sdlMuls * multDzDr * multDzDr / 3.f * coshEta * coshEta; //sloppy: relative muls is 1/3 of total muls
-    drtErr = sqrtf(drtErr);
-    const float drtMean = drtSDIn * dzOutInAbs / fabsf(dzSDIn); //
-    const float rtWindow = drtErr + rtGeom1;
-    const float rtLo_another = rt_InLo + drtMean / dzDrtScale - rtWindow;
-    const float rtHi_another = rt_InLo + drtMean + rtWindow;
-
-    //Cut #3: rt-z pointed
-    pass =  pass and ((kZ >= 0) & (rtOut >= rtLo) & (rtOut <= rtHi));
-    if(not pass) return pass;
-
-    const float sdlPVoff = 0.1f / rt_OutLo;
-    sdlCut = alpha1GeV_OutLo + sqrtf(sdlMuls * sdlMuls + sdlPVoff*sdlPVoff);
-
-
-    deltaPhiPos = SDL::deltaPhi(mdsInGPU.anchorX[secondMDIndex], mdsInGPU.anchorY[secondMDIndex], mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex]);
-
-
-    //Cut #4: deltaPhiPos can be tighter
-    pass =  pass and (fabsf(deltaPhiPos) <= sdlCut);
-    if(not pass) return pass;
-
-    float midPointX = 0.5f*(mdsInGPU.anchorX[firstMDIndex] + mdsInGPU.anchorX[thirdMDIndex]);
-    float midPointY = 0.5f* (mdsInGPU.anchorY[firstMDIndex] + mdsInGPU.anchorY[thirdMDIndex]);
-    float midPointZ = 0.5f*(mdsInGPU.anchorZ[firstMDIndex] + mdsInGPU.anchorZ[thirdMDIndex]);
-    float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex];
-    float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex];
-    float diffZ = mdsInGPU.anchorZ[thirdMDIndex] - mdsInGPU.anchorZ[firstMDIndex];
-
-    dPhi = SDL::deltaPhi(midPointX, midPointY, diffX, diffY);
-    // Cut #5: deltaPhiChange
-    pass =  pass and (fabsf(dPhi) <= sdlCut);
-    if(not pass) return pass;
-
-    float sdIn_alpha     = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]);
-    float sdIn_alpha_min = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]);
-    float sdIn_alpha_max = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]);
-    float sdOut_alpha = sdIn_alpha; //weird
-
-    float sdOut_alphaOut = SDL::deltaPhi(mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex], mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]);
-
-    float sdOut_alphaOut_min = SDL::phi_mpi_pi(__H2F(segmentsInGPU.dPhiChangeMins[outerSegmentIndex]) - __H2F(segmentsInGPU.dPhiMins[outerSegmentIndex]));
-    float sdOut_alphaOut_max = SDL::phi_mpi_pi(__H2F(segmentsInGPU.dPhiChangeMaxs[outerSegmentIndex]) - __H2F(segmentsInGPU.dPhiMaxs[outerSegmentIndex]));
-
-    float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex];
-    float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex];
-    float tl_axis_z = mdsInGPU.anchorZ[fourthMDIndex] - mdsInGPU.anchorZ[firstMDIndex];
-
-    betaIn = sdIn_alpha - SDL::deltaPhi(mdsInGPU.anchorX[firstMDIndex], mdsInGPU.anchorY[firstMDIndex], tl_axis_x, tl_axis_y);
-
-    float betaInRHmin = betaIn;
-    float betaInRHmax = betaIn;
-    betaOut = -sdOut_alphaOut + SDL::deltaPhi(mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex], tl_axis_x, tl_axis_y);
-
-    float betaOutRHmin = betaOut;
-    float betaOutRHmax = betaOut;
-
-    bool isEC_secondLayer = (modulesInGPU.subdets[innerOuterLowerModuleIndex] == SDL::Endcap) and (modulesInGPU.moduleType[innerOuterLowerModuleIndex] == SDL::TwoS);
-
-    if(isEC_secondLayer)
-    {
-        betaInRHmin = betaIn - sdIn_alpha_min + sdIn_alpha;
-        betaInRHmax = betaIn - sdIn_alpha_max + sdIn_alpha;
-    }
-
-    betaOutRHmin = betaOut - sdOut_alphaOut_min + sdOut_alphaOut;
-    betaOutRHmax = betaOut - sdOut_alphaOut_max + sdOut_alphaOut;
-
-    float swapTemp;
-    if(fabsf(betaOutRHmin) > fabsf(betaOutRHmax))
-    {
-        swapTemp = betaOutRHmin;
-        betaOutRHmin = betaOutRHmax;
-        betaOutRHmax = swapTemp;
-    }
-
-    if(fabsf(betaInRHmin) > fabsf(betaInRHmax))
-    {
-        swapTemp = betaInRHmin;
-        betaInRHmin = betaInRHmax;
-        betaInRHmax = swapTemp;
-    }
-
-    float sdIn_dr = sqrtf((mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]));
-    float sdIn_d = rt_InOut - rt_InLo;
-
-    float dr = sqrtf(tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y);
-    const float corrF = 1.f;
-    betaInCut = asinf(fminf((-sdIn_dr * corrF + dr) * SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax)) + (0.02f / sdIn_d);
-
-    //Cut #6: first beta cut
-    pass =  pass and (fabsf(betaInRHmin) < betaInCut);
-    if(not pass) return pass;
-
-    float betaAv = 0.5f * (betaIn + betaOut);
-    pt_beta = dr * SDL::k2Rinv1GeVf / sinf(betaAv);
-
-    float lIn = 5;
-    float lOut = 11;
-
-    float sdOut_dr = sqrtf((mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) * (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) + (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) * (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]));
-    float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex];
-
-    SDL::runDeltaBetaIterationsT5(betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn);
-
-     const float betaInMMSF = (fabsf(betaInRHmin + betaInRHmax) > 0) ? (2.f * betaIn / fabsf(betaInRHmin + betaInRHmax)) : 0.; //mean value of min,max is the old betaIn
-    const float betaOutMMSF = (fabsf(betaOutRHmin + betaOutRHmax) > 0) ? (2.f * betaOut / fabsf(betaOutRHmin + betaOutRHmax)) : 0.;
-    betaInRHmin *= betaInMMSF;
-    betaInRHmax *= betaInMMSF;
-    betaOutRHmin *= betaOutMMSF;
-    betaOutRHmax *= betaOutMMSF;
-
-    const float dBetaMuls = sdlThetaMulsF * 4.f / fminf(fabsf(pt_beta), SDL::pt_betaMax); //need to confirm the range-out value of 7 GeV
-
-    const float alphaInAbsReg = fmaxf(fabsf(sdIn_alpha), asinf(fminf(rt_InLo * SDL::k2Rinv1GeVf / 3.0f, SDL::sinAlphaMax)));
-    const float alphaOutAbsReg = fmaxf(fabsf(sdOut_alpha), asinf(fminf(rt_OutLo * SDL::k2Rinv1GeVf / 3.0f, SDL::sinAlphaMax)));
-    const float dBetaInLum = lIn < 11 ? 0.0f : fabsf(alphaInAbsReg*SDL::deltaZLum / z_InLo);
-    const float dBetaOutLum = lOut < 11 ? 0.0f : fabsf(alphaOutAbsReg*SDL::deltaZLum / z_OutLo);
-    const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum);
-    const float sinDPhi = sinf(dPhi);
-
-    const float dBetaRIn2 = 0; // TODO-RH
-    // const float dBetaROut2 = 0; // TODO-RH
-    float dBetaROut = 0;
-    if(modulesInGPU.moduleType[outerOuterLowerModuleIndex] == SDL::TwoS)
-    {
-        dBetaROut = (sqrtf(mdsInGPU.anchorHighEdgeX[fourthMDIndex] * mdsInGPU.anchorHighEdgeX[fourthMDIndex] + mdsInGPU.anchorHighEdgeY[fourthMDIndex] * mdsInGPU.anchorHighEdgeY[fourthMDIndex]) - sqrtf(mdsInGPU.anchorLowEdgeX[fourthMDIndex] * mdsInGPU.anchorLowEdgeX[fourthMDIndex] + mdsInGPU.anchorLowEdgeY[fourthMDIndex] * mdsInGPU.anchorLowEdgeY[fourthMDIndex])) * sinDPhi / dr;
-    }
-
-    const float dBetaROut2 = dBetaROut * dBetaROut;
-    betaOutCut = asinf(fminf(dr*SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax)) //FIXME: need faster version
-        + (0.02f / sdOut_d) + sqrtf(dBetaLum2 + dBetaMuls*dBetaMuls);
-
-    //Cut #6: The real beta cut
-    pass =  pass and (fabsf(betaOut) < betaOutCut);
-    if(not pass) return pass;
-
-    float pt_betaIn = dr * SDL::k2Rinv1GeVf/sinf(betaIn);
-    float pt_betaOut = dr * SDL::k2Rinv1GeVf / sinf(betaOut);
-    float dBetaRes = 0.02f/fminf(sdOut_d,sdIn_d);
-    float dBetaCut2 = (dBetaRes*dBetaRes * 2.0f + dBetaMuls * dBetaMuls + dBetaLum2 + dBetaRIn2 + dBetaROut2
-            + 0.25f * (fabsf(betaInRHmin - betaInRHmax) + fabsf(betaOutRHmin - betaOutRHmax)) * (fabsf(betaInRHmin - betaInRHmax) + fabsf(betaOutRHmin - betaOutRHmax)));
-    float dBeta = betaIn - betaOut;
-    deltaBetaCut = sqrtf(dBetaCut2);
-    //Cut #7: Cut on dBet
-    pass =  pass and (dBeta * dBeta <= dBetaCut2);
-
-    return pass;
-}
-
-ALPAKA_FN_ACC bool SDL::runQuintupletDefaultAlgoEEEE(struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, uint16_t& innerInnerLowerModuleIndex, uint16_t& innerOuterLowerModuleIndex, uint16_t& outerInnerLowerModuleIndex, uint16_t& outerOuterLowerModuleIndex, unsigned int& innerSegmentIndex, unsigned int& outerSegmentIndex, unsigned int& firstMDIndex, unsigned int& secondMDIndex, unsigned int& thirdMDIndex,
-        unsigned int& fourthMDIndex, float& zOut, float& rtOut, float& deltaPhiPos, float& dPhi, float& betaIn, float&
-        betaOut, float& pt_beta, float& zLo, float& rtLo, float& rtHi, float& sdlCut, float& betaInCut, float& betaOutCut, float& deltaBetaCut, float& kZ)
-{
-    bool pass = true;
-
-    bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == SDL::PS);
-    bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == SDL::PS);
-
-    float rt_InLo = mdsInGPU.anchorRt[firstMDIndex];
-    float rt_InOut = mdsInGPU.anchorRt[secondMDIndex];
-    float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex];
-
-    float z_InLo = mdsInGPU.anchorZ[firstMDIndex];
-    float z_InOut = mdsInGPU.anchorZ[secondMDIndex];
-    float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex];
-
-    float alpha1GeV_OutLo = asinf(fminf(rt_OutLo * SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax));
-
-    float rtRatio_OutLoInLo = rt_OutLo / rt_InLo; // Outer segment beginning rt divided by inner segment beginning rt;
-    float dzDrtScale = tanf(alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly
-    float zpitch_InLo = (isPS_InLo ? SDL::pixelPSZpitch : SDL::strip2SZpitch);
-    float zpitch_OutLo = (isPS_OutLo ? SDL::pixelPSZpitch : SDL::strip2SZpitch);
-    float zGeom = zpitch_InLo + zpitch_OutLo;
-
-    zLo = z_InLo + (z_InLo - SDL::deltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo > 0.f ? 1.f : dzDrtScale) - zGeom; //slope-correction only on outer end
-
-    // Cut #0: Preliminary (Only here in endcap case)
-    pass =  pass and ((z_InLo * z_OutLo) > 0);
-    if(not pass) return pass;
-
-    float dLum = copysignf(SDL::deltaZLum, z_InLo);
-    bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == SDL::PS;
-    bool isInSgInnerMDPS = modulesInGPU.moduleType[innerInnerLowerModuleIndex] == SDL::PS;
-
-    float rtGeom = (isInSgInnerMDPS and isOutSgInnerMDPS) ? 2.f * SDL::pixelPSZpitch : (isInSgInnerMDPS or isOutSgInnerMDPS) ? SDL::pixelPSZpitch + SDL::strip2SZpitch : 2.f * SDL::strip2SZpitch;
-
-    float zGeom1 = copysignf(zGeom,z_InLo);
-    float dz = z_OutLo - z_InLo;
-    rtLo = rt_InLo * (1.f + dz / (z_InLo + dLum) / dzDrtScale) - rtGeom; //slope correction only on the lower end
-
-    zOut = z_OutLo;
-    rtOut = rt_OutLo;
-
-    //Cut #1: rt condition
-
-    rtHi = rt_InLo * (1.f + dz / (z_InLo - dLum)) + rtGeom;
-
-    pass =  pass and ((rtOut >= rtLo) & (rtOut <= rtHi));
-    if(not pass) return pass;
-
-    bool isInSgOuterMDPS = modulesInGPU.moduleType[innerOuterLowerModuleIndex] == SDL::PS;
-
-    float drOutIn = rtOut - rt_InLo;
-    const float drtSDIn = rt_InOut - rt_InLo;
-    const float dzSDIn = z_InOut - z_InLo;
-    const float dr3SDIn = sqrtf(rt_InOut * rt_InOut + z_InOut * z_InOut) - sqrtf(rt_InLo * rt_InLo + z_InLo * z_InLo);
-    float coshEta = dr3SDIn / drtSDIn; //direction estimate
-    float dzOutInAbs =  fabsf(z_OutLo - z_InLo);
-    float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f);
-
-    kZ = (z_OutLo - z_InLo) / dzSDIn;
-    float sdlThetaMulsF = 0.015f * sqrtf(0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f);
-
-    float sdlMuls = sdlThetaMulsF * 3.f / SDL::ptCut * 4.f; //will need a better guess than x4?
-
-    float drtErr = sqrtf(SDL::pixelPSZpitch * SDL::pixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + sdlMuls * sdlMuls * multDzDr * multDzDr / 3.f * coshEta * coshEta);
-
-    float drtMean = drtSDIn * dzOutInAbs/fabsf(dzSDIn);
-    float rtWindow = drtErr + rtGeom;
-    float rtLo_point = rt_InLo + drtMean / dzDrtScale - rtWindow;
-    float rtHi_point = rt_InLo + drtMean + rtWindow;
-
-    // Cut #3: rt-z pointed
-    // https://github.com/slava77/cms-tkph2-ntuple/blob/superDoubletLinked-91X-noMock/doubletAnalysis.C#L3765
-
-    if (isInSgInnerMDPS and isInSgOuterMDPS) // If both PS then we can point
-    {
-        pass =  pass and (kZ >= 0 and rtOut >= rtLo_point and rtOut <= rtHi_point);
-        if(not pass) return pass;
-    }
-
-    float sdlPVoff = 0.1f/rtOut;
-    sdlCut = alpha1GeV_OutLo + sqrtf(sdlMuls * sdlMuls + sdlPVoff * sdlPVoff);
-
-    deltaPhiPos = SDL::deltaPhi(mdsInGPU.anchorX[secondMDIndex], mdsInGPU.anchorY[secondMDIndex], mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex]);
-
-    pass =  pass and (fabsf(deltaPhiPos) <= sdlCut);
-    if(not pass) return pass;
-
-    float midPointX = 0.5f*(mdsInGPU.anchorX[firstMDIndex] + mdsInGPU.anchorX[thirdMDIndex]);
-    float midPointY = 0.5f* (mdsInGPU.anchorY[firstMDIndex] + mdsInGPU.anchorY[thirdMDIndex]);
-    float midPointZ = 0.5f*(mdsInGPU.anchorZ[firstMDIndex] + mdsInGPU.anchorZ[thirdMDIndex]);
-    float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex];
-    float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex];
-    float diffZ = mdsInGPU.anchorZ[thirdMDIndex] - mdsInGPU.anchorZ[firstMDIndex];
-
-    dPhi = SDL::deltaPhi(midPointX, midPointY, diffX, diffY);
-
-    // Cut #5: deltaPhiChange
-    pass =  pass and ((fabsf(dPhi) <= sdlCut));
-    if(not pass) return pass;
-
-    float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]);
-    float sdOut_alpha = sdIn_alpha; //weird
-    float sdOut_dPhiPos = SDL::deltaPhi(mdsInGPU.anchorX[thirdMDIndex], mdsInGPU.anchorY[thirdMDIndex], mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex]);
-
-    float sdOut_dPhiChange = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]);
-    float sdOut_dPhiChange_min = __H2F(segmentsInGPU.dPhiChangeMins[outerSegmentIndex]);
-    float sdOut_dPhiChange_max = __H2F(segmentsInGPU.dPhiChangeMaxs[outerSegmentIndex]);
-
-    float sdOut_alphaOutRHmin = SDL::phi_mpi_pi(sdOut_dPhiChange_min - sdOut_dPhiPos);
-    float sdOut_alphaOutRHmax = SDL::phi_mpi_pi(sdOut_dPhiChange_max - sdOut_dPhiPos);
-    float sdOut_alphaOut = SDL::phi_mpi_pi(sdOut_dPhiChange - sdOut_dPhiPos);
-
-    float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex];
-    float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex];
-    float tl_axis_z = mdsInGPU.anchorZ[fourthMDIndex] - mdsInGPU.anchorZ[firstMDIndex];
-
-    betaIn = sdIn_alpha - SDL::deltaPhi(mdsInGPU.anchorX[firstMDIndex], mdsInGPU.anchorY[firstMDIndex], tl_axis_x, tl_axis_y);
-
-    float sdIn_alphaRHmin = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]);
-    float sdIn_alphaRHmax = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]);
-    float betaInRHmin = betaIn + sdIn_alphaRHmin - sdIn_alpha;
-    float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha;
-
-    betaOut = -sdOut_alphaOut + SDL::deltaPhi(mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex], tl_axis_x, tl_axis_y);
-
-    float betaOutRHmin = betaOut - sdOut_alphaOutRHmin + sdOut_alphaOut;
-    float betaOutRHmax = betaOut - sdOut_alphaOutRHmax + sdOut_alphaOut;
-
-    float swapTemp;
-    if(fabsf(betaOutRHmin) > fabsf(betaOutRHmax))
-    {
-        swapTemp = betaOutRHmin;
-        betaOutRHmin = betaOutRHmax;
-        betaOutRHmax = swapTemp;
-    }
-
-    if(fabsf(betaInRHmin) > fabsf(betaInRHmax))
-    {
-        swapTemp = betaInRHmin;
-        betaInRHmin = betaInRHmax;
-        betaInRHmax = swapTemp;
-    }
-    float sdIn_dr = sqrtf((mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]));
-    float sdIn_d = rt_InOut - rt_InLo;
-
-    float dr = sqrtf(tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y);
-    const float corrF = 1.f;
-    betaInCut = asinf(fminf((-sdIn_dr * corrF + dr) * SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax)) + (0.02f / sdIn_d);
-
-    //Cut #6: first beta cut
-    pass =  pass and (fabsf(betaInRHmin) < betaInCut);
-    if(not pass) return pass;
-
-    float betaAv = 0.5f * (betaIn + betaOut);
-    pt_beta = dr * SDL::k2Rinv1GeVf / sinf(betaAv);
-
-
-    int lIn= 11; //endcap
-    int lOut = 13; //endcap
-
-    float sdOut_dr = sqrtf((mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) * (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) + (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) * (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]));
-    float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex];
-
-    float diffDr = fabsf(sdIn_dr - sdOut_dr)/fabs(sdIn_dr + sdOut_dr);
-
-    SDL::runDeltaBetaIterationsT5(betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn);
-
-     const float betaInMMSF = (fabsf(betaInRHmin + betaInRHmax) > 0) ? (2.f * betaIn / fabsf(betaInRHmin + betaInRHmax)) : 0.; //mean value of min,max is the old betaIn
-    const float betaOutMMSF = (fabsf(betaOutRHmin + betaOutRHmax) > 0) ? (2.f * betaOut / fabsf(betaOutRHmin + betaOutRHmax)) : 0.;
-    betaInRHmin *= betaInMMSF;
-    betaInRHmax *= betaInMMSF;
-    betaOutRHmin *= betaOutMMSF;
-    betaOutRHmax *= betaOutMMSF;
-
-    const float dBetaMuls = sdlThetaMulsF * 4.f / fminf(fabsf(pt_beta), SDL::pt_betaMax); //need to confirm the range-out value of 7 GeV
-
-    const float alphaInAbsReg = fmaxf(fabsf(sdIn_alpha), asinf(fminf(rt_InLo * SDL::k2Rinv1GeVf / 3.0f, SDL::sinAlphaMax)));
-    const float alphaOutAbsReg = fmaxf(fabsf(sdOut_alpha), asinf(fminf(rt_OutLo * SDL::k2Rinv1GeVf / 3.0f, SDL::sinAlphaMax)));
-    const float dBetaInLum = lIn < 11 ? 0.0f : fabsf(alphaInAbsReg*SDL::deltaZLum / z_InLo);
-    const float dBetaOutLum = lOut < 11 ? 0.0f : fabsf(alphaOutAbsReg*SDL::deltaZLum / z_OutLo);
-    const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum);
-    const float sinDPhi = sinf(dPhi);
-
-    const float dBetaRIn2 = 0; // TODO-RH
-    // const float dBetaROut2 = 0; // TODO-RH
-    float dBetaROut2 = 0;//TODO-RH
-    betaOutCut = asinf(fminf(dr*SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax)) //FIXME: need faster version
-        + (0.02f / sdOut_d) + sqrtf(dBetaLum2 + dBetaMuls*dBetaMuls);
-
-    //Cut #6: The real beta cut
-    pass =  pass and (fabsf(betaOut) < betaOutCut);
-    if(not pass) return pass;
-
-    float pt_betaIn = dr * SDL::k2Rinv1GeVf/sinf(betaIn);
-    float pt_betaOut = dr * SDL::k2Rinv1GeVf / sinf(betaOut);
-    float dBetaRes = 0.02f/fminf(sdOut_d,sdIn_d);
-    float dBetaCut2 = (dBetaRes*dBetaRes * 2.0f + dBetaMuls * dBetaMuls + dBetaLum2 + dBetaRIn2 + dBetaROut2
-            + 0.25f * (fabsf(betaInRHmin - betaInRHmax) + fabsf(betaOutRHmin - betaOutRHmax)) * (fabsf(betaInRHmin - betaInRHmax) + fabsf(betaOutRHmin - betaOutRHmax)));
-    float dBeta = betaIn - betaOut;
-    //Cut #7: Cut on dBeta
-    deltaBetaCut = sqrtf(dBetaCut2);
-
-    pass =  pass and (dBeta * dBeta <= dBetaCut2);
-
-    return pass;
-}
-ALPAKA_FN_ACC bool SDL::runQuintupletDefaultAlgo(struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, uint16_t& innerInnerLowerModuleIndex, uint16_t& innerOuterLowerModuleIndex, uint16_t& outerInnerLowerModuleIndex, uint16_t& outerOuterLowerModuleIndex, unsigned int& innerSegmentIndex, unsigned int& outerSegmentIndex, unsigned int& firstMDIndex, unsigned int& secondMDIndex, unsigned int& thirdMDIndex, unsigned int& fourthMDIndex, float& zOut, float& rtOut, float& deltaPhiPos, float& deltaPhi, float& betaIn, float&
-        betaOut, float& pt_beta, float& zLo, float& zHi, float& rtLo, float& rtHi, float& zLoPointed, float& zHiPointed, float& sdlCut, float& betaInCut, float& betaOutCut, float& deltaBetaCut, float& kZ)
-{
-
-    bool pass = false;
-
-    zLo = -999;
-    zHi = -999;
-    rtLo = -999;
-    rtHi = -999;
-    zLoPointed = -999;
-    zHiPointed = -999;
-    kZ = -999;
-    betaInCut = -999;
-
-    short innerInnerLowerModuleSubdet = modulesInGPU.subdets[innerInnerLowerModuleIndex];
-    short innerOuterLowerModuleSubdet = modulesInGPU.subdets[innerOuterLowerModuleIndex];
-    short outerInnerLowerModuleSubdet = modulesInGPU.subdets[outerInnerLowerModuleIndex];
-    short outerOuterLowerModuleSubdet = modulesInGPU.subdets[outerOuterLowerModuleIndex];
-
-    if(innerInnerLowerModuleSubdet == SDL::Barrel
-            and innerOuterLowerModuleSubdet == SDL::Barrel
-            and outerInnerLowerModuleSubdet == SDL::Barrel
-            and outerOuterLowerModuleSubdet == SDL::Barrel)
-    {
-        return runQuintupletDefaultAlgoBBBB(modulesInGPU,mdsInGPU,segmentsInGPU,innerInnerLowerModuleIndex,innerOuterLowerModuleIndex,outerInnerLowerModuleIndex,outerOuterLowerModuleIndex,innerSegmentIndex,outerSegmentIndex,firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, zOut,rtOut,deltaPhiPos,deltaPhi,betaIn,betaOut,pt_beta, zLo, zHi, zLoPointed, zHiPointed, sdlCut, betaInCut, betaOutCut, deltaBetaCut);
-    }
-
-    else if(innerInnerLowerModuleSubdet == SDL::Barrel
-            and innerOuterLowerModuleSubdet == SDL::Barrel
-            and outerInnerLowerModuleSubdet == SDL::Endcap
-            and outerOuterLowerModuleSubdet == SDL::Endcap)
-    {
-       return runQuintupletDefaultAlgoBBEE(modulesInGPU,mdsInGPU,segmentsInGPU,innerInnerLowerModuleIndex,innerOuterLowerModuleIndex,outerInnerLowerModuleIndex,outerOuterLowerModuleIndex,innerSegmentIndex,outerSegmentIndex,firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, zOut,rtOut,deltaPhiPos,deltaPhi,betaIn,betaOut,pt_beta, zLo, rtLo, rtHi, sdlCut, betaInCut, betaOutCut, deltaBetaCut, kZ);
-    }
-
-
-    else if(innerInnerLowerModuleSubdet == SDL::Barrel
-            and innerOuterLowerModuleSubdet == SDL::Barrel
-            and outerInnerLowerModuleSubdet == SDL::Barrel
-            and outerOuterLowerModuleSubdet == SDL::Endcap)
-    {
-        return runQuintupletDefaultAlgoBBBB(modulesInGPU,mdsInGPU,segmentsInGPU,innerInnerLowerModuleIndex,innerOuterLowerModuleIndex,outerInnerLowerModuleIndex,outerOuterLowerModuleIndex,innerSegmentIndex,outerSegmentIndex,firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, zOut,rtOut,deltaPhiPos,deltaPhi,betaIn,betaOut,pt_beta,zLo, zHi, zLoPointed, zHiPointed, sdlCut, betaInCut, betaOutCut, deltaBetaCut);
-
-    }
-
-    else if(innerInnerLowerModuleSubdet == SDL::Barrel
-            and innerOuterLowerModuleSubdet == SDL::Endcap
-            and outerInnerLowerModuleSubdet == SDL::Endcap
-            and outerOuterLowerModuleSubdet == SDL::Endcap)
-    {
-        return runQuintupletDefaultAlgoBBEE(modulesInGPU,mdsInGPU,segmentsInGPU,innerInnerLowerModuleIndex,innerOuterLowerModuleIndex,outerInnerLowerModuleIndex,outerOuterLowerModuleIndex,innerSegmentIndex,outerSegmentIndex, firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, zOut,rtOut,deltaPhiPos,deltaPhi,betaIn,betaOut,pt_beta, zLo, rtLo, rtHi, sdlCut, betaInCut, betaOutCut, deltaBetaCut, kZ);
-
-    }
-
-    else if(innerInnerLowerModuleSubdet == SDL::Endcap
-            and innerOuterLowerModuleSubdet == SDL::Endcap
-            and outerInnerLowerModuleSubdet == SDL::Endcap
-            and outerOuterLowerModuleSubdet == SDL::Endcap)
-    {
-        return runQuintupletDefaultAlgoEEEE(modulesInGPU,mdsInGPU,segmentsInGPU,innerInnerLowerModuleIndex,innerOuterLowerModuleIndex,outerInnerLowerModuleIndex,outerOuterLowerModuleIndex,innerSegmentIndex,outerSegmentIndex, firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, zOut,rtOut,deltaPhiPos,deltaPhi,betaIn,betaOut,pt_beta, zLo, rtLo, rtHi, sdlCut, betaInCut, betaOutCut, deltaBetaCut, kZ);
-    }
-
-    return pass;
-}
-ALPAKA_FN_ACC void SDL::runDeltaBetaIterationsT5(float& betaIn, float& betaOut, float& betaAv, float & pt_beta, float sdIn_dr, float sdOut_dr, float dr, float lIn)
-{
-    if (lIn == 0)
-    {
-        betaOut += copysign(asinf(fminf(sdOut_dr * SDL::k2Rinv1GeVf / fabsf(pt_beta), SDL::sinAlphaMax)), betaOut);
-        return;
-    }
-
-    if (betaIn * betaOut > 0.f and (fabsf(pt_beta) < 4.f * SDL::pt_betaMax or (lIn >= 11 and fabsf(pt_beta) < 8.f * SDL::pt_betaMax)))   //and the pt_beta is well-defined; less strict for endcap-endcap
-    {
-
-        const float betaInUpd  = betaIn + copysignf(asinf(fminf(sdIn_dr * SDL::k2Rinv1GeVf / fabsf(pt_beta), SDL::sinAlphaMax)), betaIn); //FIXME: need a faster version
-        const float betaOutUpd = betaOut + copysignf(asinf(fminf(sdOut_dr * SDL::k2Rinv1GeVf / fabsf(pt_beta), SDL::sinAlphaMax)), betaOut); //FIXME: need a faster version
-        betaAv = 0.5f * (betaInUpd + betaOutUpd);
-
-        //1st update
-        //pt_beta = dr * k2Rinv1GeVf / sinf(betaAv); //get a better pt estimate
-        const float pt_beta_inv = 1.f/fabsf(dr * k2Rinv1GeVf / sinf(betaAv)); //get a better pt estimate
-
-        betaIn  += copysignf(asinf(fminf(sdIn_dr * SDL::k2Rinv1GeVf *pt_beta_inv, SDL::sinAlphaMax)), betaIn); //FIXME: need a faster version
-        betaOut += copysignf(asinf(fminf(sdOut_dr * SDL::k2Rinv1GeVf *pt_beta_inv, SDL::sinAlphaMax)), betaOut); //FIXME: need a faster version
-        //update the av and pt
-        betaAv = 0.5f * (betaIn + betaOut);
-        //2nd update
-        pt_beta = dr * SDL::k2Rinv1GeVf / sinf(betaAv); //get a better pt estimate
-    }
-    else if (lIn < 11 && fabsf(betaOut) < 0.2f * fabsf(betaIn) && fabsf(pt_beta) < 12.f * SDL::pt_betaMax)   //use betaIn sign as ref
-    {
-
-        const float pt_betaIn = dr * k2Rinv1GeVf / sinf(betaIn);
-
-        const float betaInUpd  = betaIn + copysignf(asinf(fminf(sdIn_dr * SDL::k2Rinv1GeVf / fabsf(pt_betaIn), SDL::sinAlphaMax)), betaIn); //FIXME: need a faster version
-        const float betaOutUpd = betaOut + copysignf(asinf(fminf(sdOut_dr * SDL::k2Rinv1GeVf / fabsf(pt_betaIn), SDL::sinAlphaMax)), betaIn); //FIXME: need a faster version
-        betaAv = (fabsf(betaOut) > 0.2f * fabsf(betaIn)) ? (0.5f * (betaInUpd + betaOutUpd)) : betaInUpd;
-
-        //1st update
-        pt_beta = dr * SDL::k2Rinv1GeVf / sin(betaAv); //get a better pt estimate
-        betaIn  += copysignf(asinf(fminf(sdIn_dr * SDL::k2Rinv1GeVf / fabsf(pt_beta), SDL::sinAlphaMax)), betaIn); //FIXME: need a faster version
-        betaOut += copysignf(asinf(fminf(sdOut_dr * SDL::k2Rinv1GeVf / fabsf(pt_beta), SDL::sinAlphaMax)), betaIn); //FIXME: need a faster version
-        //update the av and pt
-        betaAv = 0.5f * (betaIn + betaOut);
-        //2nd update
-        pt_beta = dr * SDL::k2Rinv1GeVf / sin(betaAv); //get a better pt estimate
-
-    }
-}
+}
\ No newline at end of file
diff --git a/SDL/Quintuplet.cuh b/SDL/Quintuplet.cuh
index 1ec309d5..c7ccbeb7 100644
--- a/SDL/Quintuplet.cuh
+++ b/SDL/Quintuplet.cuh
@@ -16,8 +16,8 @@ namespace SDL
     {
         unsigned int* tripletIndices;
         uint16_t* lowerModuleIndices;
-        unsigned int* nQuintuplets; // ?
-        unsigned int* totOccupancyQuintuplets; //?
+        int* nQuintuplets;
+        int* totOccupancyQuintuplets;
         unsigned int* nMemoryLocations;
 
         FPX* innerRadius;
@@ -49,64 +49,1926 @@ namespace SDL
 
     void createQuintupletsInExplicitMemory(struct SDL::quintuplets& quintupletsInGPU, const unsigned int& maxQuintuplets, const uint16_t& nLowerModules, const uint16_t& nEligibleModules,cudaStream_t stream);
 
-//    void createEligibleModulesListForQuintuplets(struct modules& modulesInGPU, struct triplets& tripletsInGPU, uint16_t& nEligibleModules, uint16_t* indicesOfEligibleModules, unsigned int& nTotalQuintuplets, unsigned int& maxTriplets,cudaStream_t stream, struct objectRanges& rangesInGPU);
-    __global__ void createEligibleModulesListForQuintupletsGPU(struct modules& modulesInGPU, struct triplets& tripletsInGPU, unsigned int* nTotalQuintuplets, cudaStream_t stream, struct objectRanges& rangesInGPU);
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool checkIntervalOverlap(const float& firstMin, const float& firstMax, const float& secondMin, const float& secondMax)
+    {
+        return ((firstMin <= secondMin) & (secondMin < firstMax)) |  ((secondMin < firstMin) & (firstMin < secondMax));
+    };
 
-//  ALPAKA_FN_ACC void rmQuintupletToMemory(struct SDL::quintuplets& quintupletsInGPU, unsigned int quintupletIndex);
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE void addQuintupletToMemory(struct SDL::triplets& tripletsInGPU, struct SDL::quintuplets& quintupletsInGPU, unsigned int innerTripletIndex, unsigned int outerTripletIndex, uint16_t& lowerModule1, uint16_t& lowerModule2, uint16_t& lowerModule3, uint16_t& lowerModule4, uint16_t& lowerModule5, float& innerRadius, float& bridgeRadius, float& outerRadius, float& regressionG, float& regressionF, float& regressionRadius, float& rzChiSquared, float& rPhiChiSquared, float& nonAnchorChiSquared, float pt, float eta, float phi, float scores, uint8_t layer, unsigned int quintupletIndex)
+    {
+        quintupletsInGPU.tripletIndices[2 * quintupletIndex] = innerTripletIndex;
+        quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1] = outerTripletIndex;
 
-    ALPAKA_FN_ACC void addQuintupletToMemory(struct SDL::triplets& tripletsInGPU, struct SDL::quintuplets& quintupletsInGPU, unsigned int innerTripletIndex, unsigned int outerTripletIndex, uint16_t& lowerModule1, uint16_t& lowerModule2, uint16_t& lowerModule3, uint16_t& lowerModule4, uint16_t& lowerModule5, float& innerRadius, float& bridgeRadius, float& outerRadius, float& regressionG, float& regressionF, float& regressionRadius, float& rzChiSquared, float& rPhiChiSquared, float& nonAnchorChiSquared, float pt, float eta, float phi, float scores, uint8_t layer, unsigned int quintupletIndex);
-   
-    ALPAKA_FN_ACC bool runQuintupletDefaultAlgo(struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, struct SDL::triplets& tripletsInGPU, uint16_t& lowerModuleIndex1, uint16_t& lowerModuleIndex2, uint16_t& lowerModuleIndex3, uint16_t& lowerModuleIndex4, uint16_t& lowerModuleIndex5, unsigned int& innerTripletIndex, unsigned int& outerTripletIndex, float& innerRadius, float& outerRadius, float& bridgeRadius, float& regressionG, float& regressionF, float& regressionRadius, float& rzChiSquared, float& chiSquared, float& nonAnchorChiSquared);
+        quintupletsInGPU.lowerModuleIndices[5 * quintupletIndex] = lowerModule1;
+        quintupletsInGPU.lowerModuleIndices[5 * quintupletIndex + 1] = lowerModule2;
+        quintupletsInGPU.lowerModuleIndices[5 * quintupletIndex + 2] = lowerModule3;
+        quintupletsInGPU.lowerModuleIndices[5 * quintupletIndex + 3] = lowerModule4;
+        quintupletsInGPU.lowerModuleIndices[5 * quintupletIndex + 4] = lowerModule5;
+        quintupletsInGPU.innerRadius[quintupletIndex] = __F2H(innerRadius);
+        quintupletsInGPU.outerRadius[quintupletIndex] = __F2H(outerRadius);
+        quintupletsInGPU.pt[quintupletIndex] = __F2H(pt);
+        quintupletsInGPU.eta[quintupletIndex] = __F2H(eta);
+        quintupletsInGPU.phi[quintupletIndex] = __F2H(phi);
+        quintupletsInGPU.score_rphisum[quintupletIndex] = __F2H(scores);
+        quintupletsInGPU.layer[quintupletIndex] = layer;
+        quintupletsInGPU.isDup[quintupletIndex] = false;
+        quintupletsInGPU.regressionRadius[quintupletIndex] = regressionRadius;
+        quintupletsInGPU.regressionG[quintupletIndex] = regressionG;
+        quintupletsInGPU.regressionF[quintupletIndex] = regressionF;
+        quintupletsInGPU.logicalLayers[5 * quintupletIndex] = tripletsInGPU.logicalLayers[3 * innerTripletIndex];
+        quintupletsInGPU.logicalLayers[5 * quintupletIndex + 1] = tripletsInGPU.logicalLayers[3 * innerTripletIndex + 1];
+        quintupletsInGPU.logicalLayers[5 * quintupletIndex + 2] = tripletsInGPU.logicalLayers[3 * innerTripletIndex + 2];
+        quintupletsInGPU.logicalLayers[5 * quintupletIndex + 3] = tripletsInGPU.logicalLayers[3 * outerTripletIndex + 1];
+        quintupletsInGPU.logicalLayers[5 * quintupletIndex + 4] = tripletsInGPU.logicalLayers[3 * outerTripletIndex + 2];
 
+        quintupletsInGPU.hitIndices[10 * quintupletIndex] = tripletsInGPU.hitIndices[6 * innerTripletIndex];
+        quintupletsInGPU.hitIndices[10 * quintupletIndex + 1] = tripletsInGPU.hitIndices[6 * innerTripletIndex + 1];
+        quintupletsInGPU.hitIndices[10 * quintupletIndex + 2] = tripletsInGPU.hitIndices[6 * innerTripletIndex + 2];
+        quintupletsInGPU.hitIndices[10 * quintupletIndex + 3] = tripletsInGPU.hitIndices[6 * innerTripletIndex + 3];
+        quintupletsInGPU.hitIndices[10 * quintupletIndex + 4] = tripletsInGPU.hitIndices[6 * innerTripletIndex + 4];
+        quintupletsInGPU.hitIndices[10 * quintupletIndex + 5] = tripletsInGPU.hitIndices[6 * innerTripletIndex + 5];
+        quintupletsInGPU.hitIndices[10 * quintupletIndex + 6] = tripletsInGPU.hitIndices[6 * outerTripletIndex + 2];
+        quintupletsInGPU.hitIndices[10 * quintupletIndex + 7] = tripletsInGPU.hitIndices[6 * outerTripletIndex + 3];
+        quintupletsInGPU.hitIndices[10 * quintupletIndex + 8] = tripletsInGPU.hitIndices[6 * outerTripletIndex + 4];
+        quintupletsInGPU.hitIndices[10 * quintupletIndex + 9] = tripletsInGPU.hitIndices[6 * outerTripletIndex + 5];
+        quintupletsInGPU.bridgeRadius[quintupletIndex] = bridgeRadius;
+        quintupletsInGPU.rzChiSquared[quintupletIndex] = rzChiSquared;
+        quintupletsInGPU.chiSquared[quintupletIndex] = rPhiChiSquared;
+        quintupletsInGPU.nonAnchorChiSquared[quintupletIndex] = nonAnchorChiSquared;
+    };
 
-    ALPAKA_FN_ACC bool passT5RZConstraint(struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, unsigned int firstMDIndex, unsigned int secondMDIndex, unsigned int thirdMDIndex, unsigned int fourthMDIndex, unsigned int fifthMDIndex, uint16_t& lowerModuleIndex1, uint16_t& lowerModuleIndex2, uint16_t& lowerModuleIndex3, uint16_t& lowerModuleIndex4, uint16_t& lowerModuleIndex5);
+    //90% constraint
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passChiSquaredConstraint(struct SDL::modules& modulesInGPU, uint16_t& lowerModuleIndex1, uint16_t& lowerModuleIndex2, uint16_t& lowerModuleIndex3, uint16_t& lowerModuleIndex4, uint16_t& lowerModuleIndex5, float& chiSquared)
+    {
+        //following Philip's layer number prescription
+        const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == SDL::TwoS);
+        const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == SDL::TwoS);
+        const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == SDL::TwoS);
+        const int layer4 = modulesInGPU.layers[lowerModuleIndex4] + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex4] == SDL::TwoS);
+        const int layer5 = modulesInGPU.layers[lowerModuleIndex5] + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex5] == SDL::TwoS);
 
-    ALPAKA_FN_ACC bool T5HasCommonMiniDoublet(struct triplets& tripletsInGPU, struct segments& segmentsInGPU, unsigned int innerTripletIndex, unsigned int outerTripletIndex);
+        if(layer1 == 7 and layer2 == 8 and layer3 == 9)
+        {
+            if(layer4 == 10 and layer5 == 11)
+            {
+                return chiSquared < 0.01788f;
+            }
+            else if(layer4 == 10 and layer5 == 16)
+            {
+                return chiSquared < 0.04725f;
+            }
+            else if(layer4 == 15 and layer5 == 16)
+            {
+                return chiSquared < 0.04725f;
+            }
+        }
+        else if(layer1 == 1 and layer2 == 7 and layer3 == 8)
+        {
+            if(layer4 == 9 and layer5 == 10)
+            {       
+                return chiSquared < 0.01788f;
+            }   
+            else if(layer4 == 9 and layer5 == 15)
+            {
+                return chiSquared < 0.08234f;
+            }
+        }
+        else if(layer1 == 1 and layer2 == 2 and layer3 == 7)
+        {
+            if(layer4 == 8 and layer5 == 9)
+            {
+                return chiSquared < 0.02360f;
+            }
+            else if(layer4 == 8 and layer5 == 14)
+            {
+                return chiSquared < 0.07167f;
+            }
+            else if(layer4 == 13 and layer5 == 14)
+            {   
+                return chiSquared < 0.08234f;
+            }
+        }
+        else if(layer1 == 1 and layer2 == 2 and layer3 == 3)
+        {
+            if(layer4 == 7 and layer5 == 8)
+            {
+                return chiSquared < 0.01026f;
+            }
+            else if(layer4 == 7 and layer5 == 13)
+            {
+                return chiSquared < 0.06238f;
+            }
+            else if(layer4 == 12 and layer5 == 13)
+            {
+                return chiSquared < 0.06238f;
+            }
+        }
+        else if(layer1 == 1 and layer2 == 2 and layer3 == 3 and layer4 == 4)
+        {
+            if(layer5 == 12)
+            {
+                return chiSquared < 0.09461f;
+            }
+            else if(layer5 == 5)
+            {
+                return chiSquared < 0.04725f;
+            }
+        }
+        else if(layer1 == 2 and layer2 == 7 and layer3 == 8)
+        {
+            if(layer4 == 9 and layer5 == 10)
+            {
+                return chiSquared < 0.00512f;
+            }
+            if(layer4 == 9 and layer5 == 15)
+            {
+                return chiSquared < 0.04112f;
+            }
+            else if(layer4 == 14 and layer5 == 15)
+            {
+                return chiSquared < 0.06238f;
+            }
+        }
+        else if(layer1 == 2 and layer2 == 3 and layer3 == 7)
+        {
+            if(layer4 == 8 and layer5 == 14)
+            {
+                return chiSquared < 0.07167f;
+            }
+            else if(layer4 == 13 and layer5 == 14)
+            {
+                return chiSquared < 0.06238f;
+            }
+        }
+        else if(layer1 == 2 and layer2 == 3 and layer3 == 4)
+        {
+            if(layer4 == 12 and layer5 == 13)
+            {
+                return chiSquared < 0.10870f;
+            }
+            else if(layer4 == 5 and layer5 == 12)
+            {
+                return chiSquared < 0.10870f;
+            }
+            else if(layer4 == 5 and layer5 == 6)
+            {
+                return chiSquared < 0.08234f;
+            }
+        }
+        else if(layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15)
+        {
+            return chiSquared < 0.09461f;
+        }
+        else if(layer1 == 3 and layer2 == 4 and layer3 == 5 and layer4 == 12 and layer5 == 13)
+        {
+            return chiSquared < 0.09461f;
+        }
 
-    ALPAKA_FN_ACC float computeRadiusFromThreeAnchorHits(float x1, float y1, float x2, float y2, float x3, float y3, float& g, float& f);
+        return true;
+    };
 
-    ALPAKA_FN_ACC void computeErrorInRadius(float* x1Vec, float* y1Vec, float* x2Vec, float* y2Vec, float* x3Vec, float* y3Vec, float& gError, float& fError);
+    //bounds can be found at http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_RZFix/t5_rz_thresholds.txt
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passT5RZConstraint(TAcc const & acc, struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, unsigned int firstMDIndex, unsigned int secondMDIndex, unsigned int thirdMDIndex, unsigned int fourthMDIndex, unsigned int fifthMDIndex, uint16_t& lowerModuleIndex1, uint16_t& lowerModuleIndex2, uint16_t& lowerModuleIndex3, uint16_t& lowerModuleIndex4, uint16_t& lowerModuleIndex5) 
+    {
+        const float& rt1 = mdsInGPU.anchorRt[firstMDIndex];
+        const float& rt2 = mdsInGPU.anchorRt[secondMDIndex];
+        const float& rt3 = mdsInGPU.anchorRt[thirdMDIndex];
+        const float& rt4 = mdsInGPU.anchorRt[fourthMDIndex];
+        const float& rt5 = mdsInGPU.anchorRt[fifthMDIndex];
 
-    ALPAKA_FN_ACC bool matchRadiiBBBBB(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, float& innerRadiusMin, float& innerRadiusMax, float& bridgeRadiusMin, float& bridgeRadiusMax, float& outerRadiusMin, float& outerRadiusMax);
+        const float& z1 = mdsInGPU.anchorZ[firstMDIndex];
+        const float& z2 = mdsInGPU.anchorZ[secondMDIndex];
+        const float& z3 = mdsInGPU.anchorZ[thirdMDIndex];
+        const float& z4 = mdsInGPU.anchorZ[fourthMDIndex];
+        const float& z5 = mdsInGPU.anchorZ[fifthMDIndex];
 
-    ALPAKA_FN_ACC bool matchRadiiBBBBE(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerRadiusMin, float& innerRadiusMax, float& bridgeRadiusMin, float& bridgeRadiusMax, float& outerRadiusMin, float& outerRadiusMax);
+        //following Philip's layer number prescription
+        const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == SDL::TwoS);
+        const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == SDL::TwoS);
+        const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == SDL::TwoS);
+        const int layer4 = modulesInGPU.layers[lowerModuleIndex4] + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex4] == SDL::TwoS);
+        const int layer5 = modulesInGPU.layers[lowerModuleIndex5] + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap) + 5 * (modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex5] == SDL::TwoS);
 
-    ALPAKA_FN_ACC bool matchRadiiBBBEE(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S,float& innerRadiusMin, float& innerRadiusMax, float& bridgeRadiusMin, float& bridgeRadiusMax, float& outerRadiusMin, float& outerRadiusMax);
+        //slope computed using the internal T3s
+        const int moduleLayer1 = modulesInGPU.moduleType[lowerModuleIndex1];
+        const int moduleLayer2 = modulesInGPU.moduleType[lowerModuleIndex2];
+        const int moduleLayer3 = modulesInGPU.moduleType[lowerModuleIndex3];
+        const int moduleLayer4 = modulesInGPU.moduleType[lowerModuleIndex4];
+        const int moduleLayer5 = modulesInGPU.moduleType[lowerModuleIndex5];
 
-    ALPAKA_FN_ACC bool matchRadiiBBBEE12378(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S,float& innerRadiusMin, float& innerRadiusMax, float& bridgeRadiusMin, float& bridgeRadiusMax, float& outerRadiusMin, float& outerRadiusMax);
+        float slope;
+        if(moduleLayer1 == 0 and moduleLayer2 == 0 and moduleLayer3 == 1) //PSPS2S
+        {
+            slope = (z2 -z1)/(rt2 - rt1);
+        }
+        else
+        {
+            slope = (z3 - z1)/(rt3 - rt1);
+        }
+        float residual4 = (layer4 <= 6)? ((z4 - z1) - slope * (rt4 - rt1)) : ((rt4 - rt1) - (z4 - z1)/slope);
+        float residual5 = (layer4 <= 6) ? ((z5 - z1) - slope * (rt5 - rt1)) : ((rt5 - rt1) - (z5 - z1)/slope);
 
-    ALPAKA_FN_ACC bool matchRadiiBBBEE23478(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S,float& innerRadiusMin, float& innerRadiusMax, float& bridgeRadiusMin, float& bridgeRadiusMax, float& outerRadiusMin, float& outerRadiusMax);
+        // creating a chi squared type quantity
+        // 0-> PS, 1->2S
+        residual4 = (moduleLayer4 == 0) ? residual4/2.4f : residual4/5.0f;
+        residual5 = (moduleLayer5 == 0) ? residual5/2.4f : residual5/5.0f;
 
-    ALPAKA_FN_ACC bool matchRadiiBBBEE34578(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S,float& innerRadiusMin, float& innerRadiusMax, float& bridgeRadiusMin, float& bridgeRadiusMax, float& outerRadiusMin, float& outerRadiusMax);
+        const float RMSE = alpaka::math::sqrt(acc, 0.5 * (residual4 * residual4 + residual5 * residual5));
 
-    ALPAKA_FN_ACC bool matchRadiiBBEEE(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S,float& innerRadiusMin, float& innerRadiusMax, float& bridgeRadiusMin, float& bridgeRadiusMax, float& outerRadiusMin, float& outerRadiusMax);
+        //categories!
+        if(layer1 == 1 and layer2 == 2 and layer3 == 3)
+        {
+            if(layer4 == 4 and layer5 == 5)
+            {
+                return RMSE < 0.545f; 
+            }
+            else if(layer4 == 4 and layer5 == 12)
+            {
+                return RMSE < 1.105f;
+            }
+            else if(layer4 == 7 and layer5 == 13)
+            {
+                return RMSE < 0.775f;
+            }
+            else if(layer4 == 12 and layer5 == 13)
+            {
+                return RMSE < 0.625f;
+            }
+        }
+        else if(layer1 == 1 and layer2 == 2 and layer3 == 7)
+        {
+            if(layer4 == 8 and layer5 == 14)
+            {
+                return RMSE < 0.835f;
+            }
+            else if(layer4 == 13 and layer5 == 14)
+            {
+                return RMSE < 0.575f;
+            }
+        }
+        else if(layer1 == 1 and layer2 == 7 and layer3 == 8 and layer4 == 9 and layer5 == 15)
+        {
+            return RMSE < 0.825f;
+        }
+        else if(layer1 == 2 and layer2 == 3 and layer3 == 4)
+        {
+            if(layer4 == 5 and layer5 == 6)
+            {
+                return RMSE < 0.845f;
+            }
+            else if(layer4 == 5 and layer5 == 12)
+            {
+                return RMSE < 1.365f;
+            }
 
-    ALPAKA_FN_ACC bool matchRadiiBEEEE(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S,float& innerRadiusMin, float& innerRadiusMax, float& bridgeRadiusMin, float& bridgeRadiusMax, float& outerRadiusMin, float& outerRadiusMax);
+            else if(layer4 == 12 and layer5 == 13)
+            {
+                return RMSE < 0.675f;
+            }
+        }
+        else if(layer1 == 2 and layer2 == 3 and layer3 == 7 and layer4 == 13 and layer5 == 14)
+        {
+            return RMSE < 0.495f;
+        }
+        else if(layer1 == 2 and layer2 == 3 and layer3 == 12 and layer4 == 13 and layer5 == 14)
+        {
+            return RMSE < 0.695f; 
+        }
+        else if(layer1 == 2 and layer2 == 7 and layer3 == 8)
+        {
+            if(layer4 == 9 and layer5 == 15)
+            {
+                return RMSE < 0.735f;
+            }
+            else if(layer4 == 14 and layer5 == 15)
+            {
+                return RMSE < 0.525f;
+            }
+        }
+        else if(layer1 == 2 and layer2 == 7 and layer3 == 13 and layer4 == 14 and layer5 == 15)
+        {
+            return RMSE < 0.665f;
+        }
+        else if(layer1 == 3 and layer2 == 4 and layer3 == 5 and layer4 == 12 and layer5 == 13)
+        {
+            return RMSE < 0.995f;
+        }
+        else if(layer1 == 3 and layer2 == 4 and layer3 == 12 and layer4 == 13 and layer5 == 14)
+        {
+            return RMSE < 0.525f;
+        }
+        else if(layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15)
+        {
+            return RMSE < 0.525f;
+        }
+        else if(layer1 == 3 and layer2 == 7 and layer3 == 13 and layer4 == 14 and layer5 == 15)
+        {
+            return RMSE < 0.745f;
+        }
+        else if(layer1 == 3 and layer2 == 12 and layer3 == 13 and layer4 == 14 and layer5 == 15)
+        {
+            return RMSE < 0.555f; 
+        }
+        else if(layer1 == 7 and layer2 == 8 and layer3 == 9 and layer4 == 15 and layer5 == 16)
+        {
+            return RMSE < 0.525f;
+        }
+        else if(layer1 == 7 and layer2 == 8 and layer3 == 14 and layer4 == 15 and layer5 == 16)
+        {
+            return RMSE < 0.885f;
+        }
+        else if(layer1 == 7 and layer2 == 13 and layer3 == 14 and layer4 == 15 and layer5 == 16)
+        {
+            return RMSE < 0.845f;
+        }
 
-    ALPAKA_FN_ACC bool matchRadiiEEEEE(const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S,float& innerRadiusMin, float& innerRadiusMax, float& bridgeRadiusMin, float& bridgeRadiusMax, float& outerRadiusMin, float& outerRadiusMax);
+        return true;
+    };
 
-    ALPAKA_FN_ACC bool checkIntervalOverlap(const float& firstMin, const float& firstMax, const float& secondMin, const float& secondMax);
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool T5HasCommonMiniDoublet(struct SDL::triplets& tripletsInGPU, struct SDL::segments& segmentsInGPU, unsigned int innerTripletIndex, unsigned int outerTripletIndex)
+    {
+        unsigned int innerOuterSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex + 1];
+        unsigned int outerInnerSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex];
+        unsigned int innerOuterOuterMiniDoubletIndex = segmentsInGPU.mdIndices[2 * innerOuterSegmentIndex + 1]; //inner triplet outer segment outer MD index
+        unsigned int outerInnerInnerMiniDoubletIndex = segmentsInGPU.mdIndices[2 * outerInnerSegmentIndex]; //outer triplet inner segmnet inner MD index
 
-    ALPAKA_FN_ACC float computeRadiusUsingRegression(int nPoints, float* xs, float* ys, float* delta1, float* delta2, float* slopes, bool* isFlat, float& g, float& f, float* sigmas, float& chiSquared);
+        return (innerOuterOuterMiniDoubletIndex == outerInnerInnerMiniDoubletIndex);
+    };
 
-    ALPAKA_FN_ACC void computeSigmasForRegression(SDL::modules& modulesInGPU, const uint16_t* lowerModuleIndices, float* delta1, float* delta2, float* slopes, bool* isFlat, int nPoints = 5, bool anchorHits = true);
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeRadiusFromThreeAnchorHits(TAcc const & acc, float x1, float y1, float x2, float y2, float x3, float y3, float& g, float& f)
+    {
+        float radius = 0.f;
 
-    ALPAKA_FN_ACC bool passChiSquaredConstraint(struct modules& modulesInGPU, uint16_t& lowerModuleIndex1, uint16_t& lowerModuleIndex2, uint16_t& lowerModuleIndex3, uint16_t& lowerModuleIndex4, uint16_t& lowerModuleIndex5, float& chiSquared);
+        //writing manual code for computing radius, which obviously sucks
+        //TODO:Use fancy inbuilt libraries like cuBLAS or cuSOLVE for this!
+        //(g,f) -> center
+        //first anchor hit - (x1,y1), second anchor hit - (x2,y2), third anchor hit - (x3, y3)
 
-    ALPAKA_FN_ACC float computeChiSquared(int nPoints, float* xs, float* ys, float* delta1, float* delta2, float* slopes, bool* isFlat, float g, float f, float radius);
+        float denomInv = 1.0f/((y1 - y3) * (x2 - x3) - (x1 - x3) * (y2 - y3));
 
-    ALPAKA_FN_ACC void runDeltaBetaIterationsT5(float& betaIn, float& betaOut, float& betaAv, float & pt_beta, float sdIn_dr, float sdOut_dr, float dr, float lIn);
+        float xy1sqr = x1 * x1 + y1 * y1;
 
-    ALPAKA_FN_ACC bool runQuintupletDefaultAlgoBBBB(struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, uint16_t& innerInnerLowerModuleIndex, uint16_t& innerOuterLowerModuleIndex, uint16_t& outerInnerLowerModuleIndex, uint16_t& outerOuterLowerModuleIndex, unsigned int& innerSegmentIndex, unsigned int& outerSegmentIndex, unsigned int& firstMDIndex, unsigned int& secondMDIndex, unsigned int& thirdMDIndex, unsigned int& fourthMDIndex, float& zOut, float& rtOut, float& deltaPhiPos, float& dPhi, float& betaIn, float& betaOut, float& pt_beta, float& zLo, float& zHi, float& zLoPointed, float& zHiPointed, float& sdlCut, float& betaInCut, float& betaOutCut, float& deltaBetaCut);
+        float xy2sqr = x2 * x2 + y2 * y2;
 
-    ALPAKA_FN_ACC bool runQuintupletDefaultAlgoBBEE(struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, uint16_t& innerInnerLowerModuleIndex, uint16_t& innerOuterLowerModuleIndex, uint16_t& outerInnerLowerModuleIndex, uint16_t& outerOuterLowerModuleIndex, unsigned int& innerSegmentIndex, unsigned int& outerSegmentIndex, unsigned int& firstMDIndex, unsigned int& secondMDIndex, unsigned int& thirdMDIndex, unsigned int& fourthMDIndex, float& zOut, float& rtOut, float& deltaPhiPos, float& dPhi, float& betaIn, float& betaOut, float& pt_beta, float& zLo, float& rtLo, float& rtHi, float& sdlCut, float& betaInCut, float& betaOutCut, float& deltaBetaCut, float& kZ);
+        float xy3sqr = x3 * x3 + y3 * y3;
 
-    ALPAKA_FN_ACC bool runQuintupletDefaultAlgoEEEE(struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, uint16_t& innerInnerLowerModuleIndex, uint16_t& innerOuterLowerModuleIndex, uint16_t& outerInnerLowerModuleIndex, uint16_t& outerOuterLowerModuleIndex, unsigned int& innerSegmentIndex, unsigned int& outerSegmentIndex, unsigned int& firstMDIndex, unsigned int& secondMDIndex, unsigned int& thirdMDIndex, unsigned int& fourthMDIndex, float& zOut, float& rtOut, float& deltaPhiPos, float& dPhi, float& betaIn, float& betaOut, float& pt_beta, float& zLo, float& rtLo, float& rtHi, float& sdlCut, float& betaInCut, float& betaOutCut, float& deltaBetaCut, float& kZ);
+        g = 0.5f * ((y3 - y2) * xy1sqr + (y1 - y3) * xy2sqr + (y2 - y1) * xy3sqr) * denomInv;
 
-    ALPAKA_FN_ACC bool runQuintupletDefaultAlgo(struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, uint16_t& innerInnerLowerModuleIndex, uint16_t& innerOuterLowerModuleIndex, uint16_t& outerInnerLowerModuleIndex, uint16_t& outerOuterLowerModuleIndex, unsigned int& innerSegmentIndex, unsigned int& outerSegmentIndex, unsigned int& firstMDIndex, unsigned int& secondMDIndex, unsigned int& thirdMDIndex, unsigned int& fourthMDIndex, float& zOut, float& rtOut, float& deltaPhiPos, float& deltaPhi, float& betaIn, float& betaOut, float& pt_beta, float& zLo, float& zHi, float& rtLo, float& rtHi, float& zLoPointed, float& zHiPointed, float& sdlCut, float& betaInCut, float& betaOutCut, float& deltaBetaCut, float& kZ);
+        f = 0.5f * ((x2 - x3) * xy1sqr + (x3 - x1) * xy2sqr + (x1 - x2) * xy3sqr) * denomInv;
 
+        float c = ((x2 * y3 - x3 * y2) * xy1sqr + (x3 * y1 - x1 * y3) * xy2sqr + (x1 * y2 - x2 * y1) * xy3sqr) * denomInv;
 
-    __global__ void createQuintupletsInGPUv2(struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, struct SDL::triplets& tripletsInGPU, struct SDL::quintuplets& quintupletsInGPU, struct SDL::objectRanges& rangesInGPU,uint16_t nEligibleT5Modules);
+        if(((y1 - y3) * (x2 - x3) - (x1 - x3) * (y2 - y3) == 0) || (g * g + f * f - c < 0))
+        {
+            printf("three collinear points or FATAL! r^2 < 0!\n");
+            radius = -1.f;
+        }
+        else
+            radius = alpaka::math::sqrt(acc, g * g  + f * f - c);
 
-}
+        return radius;
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeErrorInRadius(TAcc const & acc, float* x1Vec, float* y1Vec, float* x2Vec, float* y2Vec, float* x3Vec, float* y3Vec, float& minimumRadius, float& maximumRadius)
+    {
+        //brute force
+        float candidateRadius;
+        float g, f;
+        minimumRadius = SDL::SDL_INF;
+        maximumRadius = 0.f;
+        for(size_t i = 0; i < 3; i++)
+        {
+            float x1 = x1Vec[i];
+            float y1 = y1Vec[i];
+            for(size_t j = 0; j < 3; j++)
+            {
+                float x2 = x2Vec[j];
+                float y2 = y2Vec[j];
+                for(size_t k = 0; k < 3; k++)
+                {
+                    float x3 = x3Vec[k];
+                    float y3 = y3Vec[k];
+                    candidateRadius = computeRadiusFromThreeAnchorHits(acc, x1, y1, x2, y2, x3, y3, g, f);
+                    maximumRadius = alpaka::math::max(acc, candidateRadius, maximumRadius);
+                    minimumRadius = alpaka::math::min(acc, candidateRadius, minimumRadius);
+                }
+            }
+        }
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE12378(TAcc const & acc, const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
+    {
+        float innerInvRadiusErrorBound = 0.178f;
+        float bridgeInvRadiusErrorBound = 0.507f;
+        float outerInvRadiusErrorBound = 7.655f;
+
+        innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
+        innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
+
+        bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
+        bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
+
+        outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
+        outerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
+
+        return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f/bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f/bridgeRadiusMin2S));
+    };
+
+    /*bounds for high Pt taken from : http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_efficiency/efficiencies/new_efficiencies/efficiencies_20210513_T5_recovering_high_Pt_efficiencies/highE_radius_matching/highE_bounds.txt */
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBBB(TAcc const & acc, const float& innerRadius, const float& bridgeRadius, const float& outerRadius, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
+    {
+        float innerInvRadiusErrorBound =  0.1512f;
+        float bridgeInvRadiusErrorBound = 0.1781f;
+        float outerInvRadiusErrorBound = 0.1840f;
+
+        if(innerRadius > 2.0f/(2.f * k2Rinv1GeVf))
+        {
+            innerInvRadiusErrorBound = 0.4449f;
+            bridgeInvRadiusErrorBound = 0.4033f;
+            outerInvRadiusErrorBound = 0.8016f;
+        }
+
+        innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
+        innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
+
+        bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
+        bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
+
+        outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
+        outerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
+
+        return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax);
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBBE(TAcc const & acc, const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
+    {
+        float innerInvRadiusErrorBound =  0.1781f;
+        float bridgeInvRadiusErrorBound = 0.2167f;
+        float outerInvRadiusErrorBound = 1.1116f;
+
+        if(innerRadius > 2.0f/(2.f * k2Rinv1GeVf))
+        {
+            innerInvRadiusErrorBound = 0.4750f;
+            bridgeInvRadiusErrorBound = 0.3903f;
+            outerInvRadiusErrorBound = 15.2120f;
+        }
+
+        innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
+        innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
+
+        bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
+        bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
+
+        outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
+        outerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
+
+        return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax);
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE(TAcc const & acc, const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
+    {
+        float innerInvRadiusErrorBound =  0.1840f;
+        float bridgeInvRadiusErrorBound = 0.5971f;
+        float outerInvRadiusErrorBound = 11.7102f;
+
+        if(innerRadius > 2.0f/(2.f * k2Rinv1GeVf)) //as good as no selections
+        {
+            innerInvRadiusErrorBound = 1.0412f;
+            outerInvRadiusErrorBound = 32.2737f;
+            bridgeInvRadiusErrorBound = 10.9688f;
+        }
+
+        innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
+        innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
+
+        bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
+        bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
+
+        outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
+        outerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
+
+        return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f/bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f/bridgeRadiusMin2S));
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE23478(TAcc const & acc, const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
+    {
+        float innerInvRadiusErrorBound = 0.2097f;
+        float bridgeInvRadiusErrorBound = 0.8557f;
+        float outerInvRadiusErrorBound = 24.0450f;
+
+        innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
+        innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
+
+        bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
+        bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
+
+        outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
+        outerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
+
+        return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f/bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f/bridgeRadiusMin2S));
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE34578(TAcc const & acc, const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
+    {
+        float innerInvRadiusErrorBound = 0.066f;
+        float bridgeInvRadiusErrorBound = 0.617f;
+        float outerInvRadiusErrorBound = 2.688f;
+
+        innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
+        innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
+
+        bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
+        bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
+
+        outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
+        outerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
+
+        return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f/bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f/bridgeRadiusMin2S));
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBEEE(TAcc const & acc, const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
+    {
+        float innerInvRadiusErrorBound =  0.6376f;
+        float bridgeInvRadiusErrorBound = 2.1381f;
+        float outerInvRadiusErrorBound = 20.4179f;
+
+        if(innerRadius > 2.0f/(2.f * k2Rinv1GeVf)) //as good as no selections!
+        {
+            innerInvRadiusErrorBound = 12.9173f;
+            outerInvRadiusErrorBound = 25.6702f;
+            bridgeInvRadiusErrorBound = 5.1700f;
+        }
+
+        innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
+        innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
+
+        bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
+        bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
+
+        outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
+        outerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
+
+        return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f/bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f/bridgeRadiusMin2S));
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBEEEE(TAcc const & acc, const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
+    {
+        float innerInvRadiusErrorBound =  1.9382f;
+        float bridgeInvRadiusErrorBound = 3.7280f;
+        float outerInvRadiusErrorBound = 5.7030f;
+
+        if(innerRadius > 2.0f/(2.f * k2Rinv1GeVf))
+        {
+            innerInvRadiusErrorBound = 23.2713f;
+            outerInvRadiusErrorBound = 24.0450f;
+            bridgeInvRadiusErrorBound = 21.7980f;
+        }
+
+        innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
+        innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
+
+        bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
+        bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
+
+        outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
+        outerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
+
+        return checkIntervalOverlap(alpaka::math::min(acc, innerInvRadiusMin, 1.0/innerRadiusMax2S), alpaka::math::max(acc, innerInvRadiusMax, 1.0/innerRadiusMin2S), alpaka::math::min(acc, bridgeInvRadiusMin, 1.0/bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0/bridgeRadiusMin2S));
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiEEEEE(TAcc const & acc, const float& innerRadius, const float& bridgeRadius, const float& outerRadius, const float& innerRadiusMin2S, const float& innerRadiusMax2S, const float& bridgeRadiusMin2S, const float& bridgeRadiusMax2S, const float& outerRadiusMin2S, const float& outerRadiusMax2S, float& innerInvRadiusMin, float& innerInvRadiusMax, float& bridgeInvRadiusMin, float& bridgeInvRadiusMax, float& outerInvRadiusMin, float& outerInvRadiusMax)
+    {
+        float innerInvRadiusErrorBound =  1.9382f;
+        float bridgeInvRadiusErrorBound = 2.2091f;
+        float outerInvRadiusErrorBound = 7.4084f;
+
+        if(innerRadius > 2.0f/(2.f * k2Rinv1GeVf))
+        {
+            innerInvRadiusErrorBound = 22.5226f;
+            bridgeInvRadiusErrorBound = 21.0966f;
+            outerInvRadiusErrorBound = 19.1252f;
+        }
+
+        innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius;
+        innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius);
+
+        bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius;
+        bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius);
+
+        outerInvRadiusMax = (1.f + outerInvRadiusErrorBound) / outerRadius;
+        outerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - outerInvRadiusErrorBound) / outerRadius);
+
+        return checkIntervalOverlap(alpaka::math::min(acc, innerInvRadiusMin, 1.0/innerRadiusMax2S), alpaka::math::max(acc, innerInvRadiusMax, 1.0/innerRadiusMin2S), alpaka::math::min(acc, bridgeInvRadiusMin, 1.0/bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0/bridgeRadiusMin2S));
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression(TAcc const & acc, SDL::modules& modulesInGPU, const uint16_t* lowerModuleIndices, float* delta1, float* delta2, float* slopes, bool* isFlat, int nPoints = 5, bool anchorHits = true) 
+    {
+        /*
+        Bool anchorHits required to deal with a weird edge case wherein 
+        the hits ultimately used in the regression are anchor hits, but the
+        lower modules need not all be Pixel Modules (in case of PS). Similarly,
+        when we compute the chi squared for the non-anchor hits, the "partner module"
+        need not always be a PS strip module, but all non-anchor hits sit on strip 
+        modules.
+        */
+
+        ModuleType moduleType;
+        short moduleSubdet, moduleSide;
+        float inv1 = 0.01f/0.009f;
+        float inv2 = 0.15f/0.009f;
+        float inv3 = 2.4f/0.009f;
+        for(size_t i = 0; i < nPoints; i++)
+        {
+            moduleType = modulesInGPU.moduleType[lowerModuleIndices[i]];
+            moduleSubdet = modulesInGPU.subdets[lowerModuleIndices[i]];
+            moduleSide = modulesInGPU.sides[lowerModuleIndices[i]];
+            float& drdz = modulesInGPU.drdzs[lowerModuleIndices[i]];
+            slopes[i] = modulesInGPU.slopes[lowerModuleIndices[i]]; 
+            //category 1 - barrel PS flat
+            if(moduleSubdet == Barrel and moduleType == PS and moduleSide == Center)
+            {
+                delta1[i] = inv1;
+                delta2[i] = inv1;
+                slopes[i] = -999.f;
+                isFlat[i] = true;
+            }
+            //category 2 - barrel 2S
+            else if(moduleSubdet == Barrel and moduleType == TwoS)
+            {
+                delta1[i] = 1.f;
+                delta2[i] = 1.f;
+                slopes[i] = -999.f;
+                isFlat[i] = true;
+            }
+            //category 3 - barrel PS tilted
+            else if(moduleSubdet == Barrel and moduleType == PS and moduleSide != Center)
+            {
+                delta1[i] = inv1;
+                isFlat[i] = false;
+
+                if(anchorHits)
+                {
+                    delta2[i] = (inv2 * drdz/alpaka::math::sqrt(acc, 1 + drdz * drdz));
+                }
+                else
+                {
+                    delta2[i] = (inv3 * drdz/alpaka::math::sqrt(acc, 1 + drdz * drdz));
+                }
+            }
+            //category 4 - endcap PS
+            else if(moduleSubdet == Endcap and moduleType == PS)
+            {
+                delta1[i] = inv1;
+                isFlat[i] = false;
+
+                /*
+                despite the type of the module layer of the lower module index,
+                all anchor hits are on the pixel side and all non-anchor hits are
+                on the strip side!
+                */
+                if(anchorHits)
+                {
+                    delta2[i] = inv2;
+                }
+                else
+                {
+                    delta2[i] = inv3;
+                }
+            }
+            //category 5 - endcap 2S
+            else if(moduleSubdet == Endcap and moduleType == TwoS)
+            {
+                delta1[i] = 1.f;
+                delta2[i] = 500.f*inv1;
+                isFlat[i] = false;
+            }
+            else
+            {
+                printf("ERROR!!!!! I SHOULDN'T BE HERE!!!! subdet = %d, type = %d, side = %d\n", moduleSubdet, moduleType, moduleSide);
+            }
+        }
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeRadiusUsingRegression(TAcc const & acc, int nPoints, float* xs, float* ys, float* delta1, float* delta2, float* slopes, bool* isFlat, float& g, float& f, float* sigmas, float& chiSquared)
+    {
+        float radius = 0.f;
+
+        // Some extra variables
+        // the two variables will be caled x1 and x2, and y (which is x^2 + y^2)
+
+        float sigmaX1Squared = 0.f;
+        float sigmaX2Squared = 0.f;
+        float sigmaX1X2 = 0.f; 
+        float sigmaX1y = 0.f; 
+        float sigmaX2y = 0.f;
+        float sigmaY = 0.f;
+        float sigmaX1 = 0.f;
+        float sigmaX2 = 0.f;
+        float sigmaOne = 0.f;
+
+        float xPrime, yPrime, absArctanSlope, angleM;
+        for(size_t i = 0; i < nPoints; i++)
+        {
+            // Computing sigmas is a very tricky affair
+            // if the module is tilted or endcap, we need to use the slopes properly!
+
+            absArctanSlope = ((slopes[i] != SDL::SDL_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) : 0.5f*float(M_PI));
+
+            if(xs[i] > 0 and ys[i] > 0)
+            {
+                angleM = 0.5f*float(M_PI) - absArctanSlope;
+            }
+            else if(xs[i] < 0 and ys[i] > 0)
+            {
+                angleM = absArctanSlope + 0.5f*float(M_PI);
+            }
+            else if(xs[i] < 0 and ys[i] < 0)
+            {
+                angleM = -(absArctanSlope + 0.5f*float(M_PI));
+            }
+            else if(xs[i] > 0 and ys[i] < 0)
+            {
+                angleM = -(0.5f*float(M_PI) - absArctanSlope);
+            }
+
+            if(not isFlat[i])
+            {
+                xPrime = xs[i] * alpaka::math::cos(acc, angleM) + ys[i] * alpaka::math::sin(acc, angleM);
+                yPrime = ys[i] * alpaka::math::cos(acc, angleM) - xs[i] * alpaka::math::sin(acc, angleM);
+            }
+            else
+            {
+                xPrime = xs[i];
+                yPrime = ys[i];
+            }
+            sigmas[i] = 2 * alpaka::math::sqrt(acc, (xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i]));
+
+            sigmaX1Squared += (xs[i] * xs[i])/(sigmas[i] * sigmas[i]);
+            sigmaX2Squared += (ys[i] * ys[i])/(sigmas[i] * sigmas[i]);
+            sigmaX1X2 += (xs[i] * ys[i])/(sigmas[i] * sigmas[i]);
+            sigmaX1y += (xs[i] * (xs[i] * xs[i] + ys[i] * ys[i]))/(sigmas[i] * sigmas[i]);
+            sigmaX2y += (ys[i] * (xs[i] * xs[i] + ys[i] * ys[i]))/(sigmas[i] * sigmas[i]);
+            sigmaY += (xs[i] * xs[i] + ys[i] * ys[i])/(sigmas[i] * sigmas[i]);
+            sigmaX1 += xs[i]/(sigmas[i] * sigmas[i]);
+            sigmaX2 += ys[i]/(sigmas[i] * sigmas[i]);
+            sigmaOne += 1.0f/(sigmas[i] * sigmas[i]);
+        }
+        float denominator = (sigmaX1X2 - sigmaX1 * sigmaX2) * (sigmaX1X2 - sigmaX1 * sigmaX2) - (sigmaX1Squared - sigmaX1 * sigmaX1) * (sigmaX2Squared - sigmaX2 * sigmaX2);
+
+        float twoG = ((sigmaX2y - sigmaX2 * sigmaY) * (sigmaX1X2 - sigmaX1 * sigmaX2) - (sigmaX1y - sigmaX1 * sigmaY) * (sigmaX2Squared - sigmaX2 * sigmaX2)) / denominator;
+        float twoF = ((sigmaX1y - sigmaX1 * sigmaY) * (sigmaX1X2 - sigmaX1 * sigmaX2) - (sigmaX2y - sigmaX2 * sigmaY) * (sigmaX1Squared - sigmaX1 * sigmaX1)) / denominator;
+
+        float c = -(sigmaY - twoG * sigmaX1 - twoF * sigmaX2)/sigmaOne;
+        g = 0.5f * twoG;
+        f = 0.5f * twoF;
+        if(g * g + f * f - c < 0)
+        {
+            printf("FATAL! r^2 < 0!\n");
+            return -1;
+        }
+
+        radius = alpaka::math::sqrt(acc, g * g + f * f - c);
+        // compute chi squared
+        chiSquared = 0.f;
+        for(size_t i = 0; i < nPoints; i++)
+        {
+            chiSquared +=  (xs[i] * xs[i] + ys[i] * ys[i] - twoG * xs[i] - twoF * ys[i] + c) * (xs[i] * xs[i] + ys[i] * ys[i] - twoG * xs[i] - twoF * ys[i] + c) / (sigmas[i] * sigmas[i]);
+        }
+        return radius;
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeChiSquared(TAcc const & acc, int nPoints, float* xs, float* ys, float* delta1, float* delta2, float* slopes, bool* isFlat, float g, float f, float radius)
+    {
+        // given values of (g, f, radius) and a set of points (and its uncertainties)
+        // compute chi squared
+        float c = g*g + f*f - radius*radius;
+        float chiSquared = 0.f;
+        float absArctanSlope, angleM, xPrime, yPrime, sigma;
+        for(size_t i = 0; i < nPoints; i++)
+        {
+            absArctanSlope = ((slopes[i] != SDL::SDL_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) : 0.5f*float(M_PI));
+            if(xs[i] > 0 and ys[i] > 0)
+            {
+                angleM = 0.5f*float(M_PI) - absArctanSlope;
+            }
+            else if(xs[i] < 0 and ys[i] > 0)
+            {
+                angleM = absArctanSlope + 0.5f*float(M_PI);
+            }
+            else if(xs[i] < 0 and ys[i] < 0)
+            {
+                angleM = -(absArctanSlope + 0.5f*float(M_PI));
+            }
+            else if(xs[i] > 0 and ys[i] < 0)
+            {
+                angleM = -(0.5f*float(M_PI) - absArctanSlope);
+            }
+
+            if(not isFlat[i])
+            {
+                xPrime = xs[i] * alpaka::math::cos(acc, angleM) + ys[i] *alpaka::math::sin(acc, angleM);
+                yPrime = ys[i] * alpaka::math::cos(acc, angleM) - xs[i] *alpaka::math::sin(acc, angleM);
+            }
+            else
+            {
+                xPrime = xs[i];
+                yPrime = ys[i];
+            }
+            sigma = 2 * alpaka::math::sqrt(acc, (xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i]));
+            chiSquared +=  (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) * (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) / (sigma * sigma);
+        }
+        return chiSquared; 
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE void runDeltaBetaIterationsT5(TAcc const & acc, float& betaIn, float& betaOut, float& betaAv, float & pt_beta, float sdIn_dr, float sdOut_dr, float dr, float lIn)
+    {
+        if (lIn == 0)
+        {
+            betaOut += SDL::copysignf_alpaka(alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * SDL::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), SDL::sinAlphaMax)), betaOut);
+            return;
+        }
+
+        if (betaIn * betaOut > 0.f and (alpaka::math::abs(acc, pt_beta) < 4.f * SDL::pt_betaMax or (lIn >= 11 and alpaka::math::abs(acc, pt_beta) < 8.f * SDL::pt_betaMax)))   //and the pt_beta is well-defined; less strict for endcap-endcap
+        {
+            const float betaInUpd  = betaIn + SDL::copysignf_alpaka(alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * SDL::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), SDL::sinAlphaMax)), betaIn); //FIXME: need a faster version
+            const float betaOutUpd = betaOut + SDL::copysignf_alpaka(alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * SDL::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), SDL::sinAlphaMax)), betaOut); //FIXME: need a faster version
+            betaAv = 0.5f * (betaInUpd + betaOutUpd);
+
+            //1st update
+            const float pt_beta_inv = 1.f/alpaka::math::abs(acc, dr * k2Rinv1GeVf /alpaka::math::sin(acc, betaAv)); //get a better pt estimate
+
+            betaIn  += SDL::copysignf_alpaka(alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * SDL::k2Rinv1GeVf *pt_beta_inv, SDL::sinAlphaMax)), betaIn); //FIXME: need a faster version
+            betaOut += SDL::copysignf_alpaka(alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * SDL::k2Rinv1GeVf *pt_beta_inv, SDL::sinAlphaMax)), betaOut); //FIXME: need a faster version
+            //update the av and pt
+            betaAv = 0.5f * (betaIn + betaOut);
+            //2nd update
+            pt_beta = dr * SDL::k2Rinv1GeVf /alpaka::math::sin(acc, betaAv); //get a better pt estimate
+        }
+        else if (lIn < 11 && alpaka::math::abs(acc, betaOut) < 0.2f * alpaka::math::abs(acc, betaIn) && alpaka::math::abs(acc, pt_beta) < 12.f * SDL::pt_betaMax)   //use betaIn sign as ref
+        {
+            const float pt_betaIn = dr * k2Rinv1GeVf /alpaka::math::sin(acc, betaIn);
+
+            const float betaInUpd  = betaIn + SDL::copysignf_alpaka(alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * SDL::k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), SDL::sinAlphaMax)), betaIn); //FIXME: need a faster version
+            const float betaOutUpd = betaOut + SDL::copysignf_alpaka(alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * SDL::k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), SDL::sinAlphaMax)), betaIn); //FIXME: need a faster version
+            betaAv = (alpaka::math::abs(acc, betaOut) > 0.2f * alpaka::math::abs(acc, betaIn)) ? (0.5f * (betaInUpd + betaOutUpd)) : betaInUpd;
+
+            //1st update
+            pt_beta = dr * SDL::k2Rinv1GeVf / sin(betaAv); //get a better pt estimate
+            betaIn  += SDL::copysignf_alpaka(alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * SDL::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), SDL::sinAlphaMax)), betaIn); //FIXME: need a faster version
+            betaOut += SDL::copysignf_alpaka(alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * SDL::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), SDL::sinAlphaMax)), betaIn); //FIXME: need a faster version
+            //update the av and pt
+            betaAv = 0.5f * (betaIn + betaOut);
+            //2nd update
+            pt_beta = dr * SDL::k2Rinv1GeVf / sin(betaAv); //get a better pt estimate
+        }
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBBB(TAcc const & acc, struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, uint16_t& innerInnerLowerModuleIndex, uint16_t& innerOuterLowerModuleIndex, uint16_t& outerInnerLowerModuleIndex, uint16_t& outerOuterLowerModuleIndex, unsigned int& innerSegmentIndex, unsigned int& outerSegmentIndex, unsigned int& firstMDIndex, unsigned int& secondMDIndex, unsigned int& thirdMDIndex, unsigned int& fourthMDIndex, float& zOut, float& rtOut, float& deltaPhiPos, float& dPhi, float& betaIn, float&betaOut, float& pt_beta, float& zLo, float& zHi, float& zLoPointed, float& zHiPointed, float& sdlCut, float& betaInCut, float& betaOutCut, float& deltaBetaCut)
+    {
+        bool pass = true;
+
+        bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == SDL::PS);
+        bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == SDL::PS);
+
+        float rt_InLo = mdsInGPU.anchorRt[firstMDIndex];
+        float rt_InOut = mdsInGPU.anchorRt[secondMDIndex];
+        float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex];
+
+        float z_InLo = mdsInGPU.anchorZ[firstMDIndex];
+        float z_InOut = mdsInGPU.anchorZ[secondMDIndex];
+        float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex];
+
+        float alpha1GeV_OutLo = alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax));
+
+        float rtRatio_OutLoInLo = rt_OutLo / rt_InLo; // Outer segment beginning rt divided by inner segment beginning rt;
+        float dzDrtScale = alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly
+        float zpitch_InLo = (isPS_InLo ? SDL::pixelPSZpitch : SDL::strip2SZpitch);
+        float zpitch_OutLo = (isPS_OutLo ? SDL::pixelPSZpitch : SDL::strip2SZpitch);
+
+        zHi = z_InLo + (z_InLo + SDL::deltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo < 0.f ? 1.f : dzDrtScale) + (zpitch_InLo + zpitch_OutLo);
+        zLo = z_InLo + (z_InLo - SDL::deltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo > 0.f ? 1.f : dzDrtScale) - (zpitch_InLo + zpitch_OutLo);
+
+        //Cut 1 - z compatibility
+        zOut = z_OutLo;
+        rtOut = rt_OutLo;
+        pass = pass and ((z_OutLo >= zLo) & (z_OutLo <= zHi));
+        if(not pass) return pass;
+
+        float drt_OutLo_InLo = (rt_OutLo - rt_InLo);
+        float r3_InLo = alpaka::math::sqrt(acc, z_InLo * z_InLo + rt_InLo * rt_InLo);
+        float drt_InSeg = rt_InOut - rt_InLo;
+        float dz_InSeg = z_InOut - z_InLo;
+        float dr3_InSeg = alpaka::math::sqrt(acc, rt_InOut * rt_InOut + z_InOut * z_InOut) - alpaka::math::sqrt(acc, rt_InLo * rt_InLo + z_InLo * z_InLo);
+
+        float coshEta = dr3_InSeg/drt_InSeg;
+        float dzErr = (zpitch_InLo + zpitch_OutLo) * (zpitch_InLo + zpitch_OutLo) * 2.f;
+
+        float sdlThetaMulsF = 0.015f * alpaka::math::sqrt(acc, 0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f) * alpaka::math::sqrt(acc, r3_InLo / rt_InLo);
+        float sdlMuls = sdlThetaMulsF * 3.f / SDL::ptCut * 4.f; // will need a better guess than x4?
+        dzErr += sdlMuls * sdlMuls * drt_OutLo_InLo * drt_OutLo_InLo / 3.f * coshEta * coshEta; //sloppy
+        dzErr = alpaka::math::sqrt(acc, dzErr);
+
+        // Constructing upper and lower bound
+        const float dzMean = dz_InSeg / drt_InSeg * drt_OutLo_InLo;
+        const float zWindow = dzErr / drt_InSeg * drt_OutLo_InLo + (zpitch_InLo + zpitch_OutLo); //FIXME for SDL::ptCut lower than ~0.8 need to add curv path correction
+        zLoPointed = z_InLo + dzMean * (z_InLo > 0.f ? 1.f : dzDrtScale) - zWindow;
+        zHiPointed = z_InLo + dzMean * (z_InLo < 0.f ? 1.f : dzDrtScale) + zWindow;
+
+        // Cut #2: Pointed Z (Inner segment two MD points to outer segment inner MD)
+        pass = pass and ((z_OutLo >= zLoPointed) & (z_OutLo <= zHiPointed));
+        if(not pass) return pass;
+
+        float sdlPVoff = 0.1f/rt_OutLo;
+        sdlCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, sdlMuls * sdlMuls + sdlPVoff * sdlPVoff);
+
+        deltaPhiPos = SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorX[secondMDIndex], mdsInGPU.anchorY[secondMDIndex], mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex]);
+        // Cut #3: FIXME:deltaPhiPos can be tighter
+        pass = pass and (alpaka::math::abs(acc, deltaPhiPos) <= sdlCut);
+        if(not pass) return pass;
+
+        float midPointX = 0.5f*(mdsInGPU.anchorX[firstMDIndex] + mdsInGPU.anchorX[thirdMDIndex]);
+        float midPointY = 0.5f* (mdsInGPU.anchorY[firstMDIndex] + mdsInGPU.anchorY[thirdMDIndex]);
+        float midPointZ = 0.5f*(mdsInGPU.anchorZ[firstMDIndex] + mdsInGPU.anchorZ[thirdMDIndex]);
+        float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex];
+        float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex];
+        float diffZ = mdsInGPU.anchorZ[thirdMDIndex] - mdsInGPU.anchorZ[firstMDIndex];
+
+        dPhi = SDL::deltaPhi_alpaka(acc, midPointX, midPointY, diffX, diffY);
+
+        // Cut #4: deltaPhiChange
+        pass = pass and (alpaka::math::abs(acc, dPhi) <= sdlCut);
+        //lots of array accesses below. Cut here!
+        if(not pass) return pass;
+
+        // First obtaining the raw betaIn and betaOut values without any correction and just purely based on the mini-doublet hit positions
+
+        float alpha_InLo  = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]);
+        float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]);
+
+        bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == SDL::Endcap and modulesInGPU.moduleType[outerOuterLowerModuleIndex] == SDL::TwoS;
+
+        float alpha_OutUp,alpha_OutUp_highEdge,alpha_OutUp_lowEdge;
+
+        alpha_OutUp = SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex], mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]);
+
+        alpha_OutUp_highEdge = alpha_OutUp;
+        alpha_OutUp_lowEdge = alpha_OutUp;
+
+        float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex];
+        float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex];
+        float tl_axis_z = mdsInGPU.anchorZ[fourthMDIndex] - mdsInGPU.anchorZ[firstMDIndex];
+        float tl_axis_highEdge_x = tl_axis_x;
+        float tl_axis_highEdge_y = tl_axis_y;
+        float tl_axis_lowEdge_x = tl_axis_x;
+        float tl_axis_lowEdge_y = tl_axis_y;
+
+        betaIn = alpha_InLo - SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorX[firstMDIndex], mdsInGPU.anchorY[firstMDIndex], tl_axis_x, tl_axis_y);
+
+        float betaInRHmin = betaIn;
+        float betaInRHmax = betaIn;
+        betaOut = -alpha_OutUp + SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex], tl_axis_x, tl_axis_y);
+
+        float betaOutRHmin = betaOut;
+        float betaOutRHmax = betaOut;
+
+        if(isEC_lastLayer)
+        {
+            alpha_OutUp_highEdge = SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorHighEdgeX[fourthMDIndex], mdsInGPU.anchorHighEdgeY[fourthMDIndex], mdsInGPU.anchorHighEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], mdsInGPU.anchorHighEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]);
+            alpha_OutUp_lowEdge = SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorLowEdgeX[fourthMDIndex], mdsInGPU.anchorLowEdgeY[fourthMDIndex], mdsInGPU.anchorLowEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], mdsInGPU.anchorLowEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]);
+
+            tl_axis_highEdge_x = mdsInGPU.anchorHighEdgeX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex];
+            tl_axis_highEdge_y = mdsInGPU.anchorHighEdgeY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex];
+            tl_axis_lowEdge_x = mdsInGPU.anchorLowEdgeX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex];
+            tl_axis_lowEdge_y = mdsInGPU.anchorLowEdgeY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex];
+
+
+            betaOutRHmin = -alpha_OutUp_highEdge + SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorHighEdgeX[fourthMDIndex], mdsInGPU.anchorHighEdgeY[fourthMDIndex], tl_axis_highEdge_x, tl_axis_highEdge_y);
+            betaOutRHmax = -alpha_OutUp_lowEdge + SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorLowEdgeX[fourthMDIndex], mdsInGPU.anchorLowEdgeY[fourthMDIndex], tl_axis_lowEdge_x, tl_axis_lowEdge_y);
+        }
+
+        //beta computation
+        float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y);
+        float drt_tl_lowEdge = alpaka::math::sqrt(acc, tl_axis_lowEdge_x * tl_axis_lowEdge_x + tl_axis_lowEdge_y * tl_axis_lowEdge_y);
+        float drt_tl_highEdge = alpaka::math::sqrt(acc, tl_axis_highEdge_x * tl_axis_highEdge_x + tl_axis_highEdge_y * tl_axis_highEdge_y);
+
+        float corrF = 1.f;
+        //innerOuterAnchor - innerInnerAnchor
+        const float rt_InSeg = alpaka::math::sqrt(acc, (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]));
+        betaInCut = alpaka::math::asin(acc, alpaka::math::min(acc, (-rt_InSeg * corrF + drt_tl_axis) * SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax)) + (0.02f / drt_InSeg);
+
+        //Cut #5: first beta cut
+        pass = pass and (alpaka::math::abs(acc, betaInRHmin) < betaInCut);
+        if(not pass) return pass;
+
+        float betaAv = 0.5f * (betaIn + betaOut);
+        pt_beta = drt_tl_axis * SDL::k2Rinv1GeVf/alpaka::math::sin(acc, betaAv);
+        int lIn = 5;
+        int lOut = isEC_lastLayer ? 11 : 5;
+        float sdOut_dr = alpaka::math::sqrt(acc, (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) * (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) + (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) * (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]));
+        float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex];
+
+        const float diffDr = alpaka::math::abs(acc, rt_InSeg - sdOut_dr) / alpaka::math::abs(acc, rt_InSeg + sdOut_dr);
+
+        SDL::runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn);
+
+        const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) : 0.f; //mean value of min,max is the old betaIn
+        const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) : 0.f;
+        betaInRHmin *= betaInMMSF;
+        betaInRHmax *= betaInMMSF;
+        betaOutRHmin *= betaOutMMSF;
+        betaOutRHmax *= betaOutMMSF;
+
+        const float dBetaMuls = sdlThetaMulsF * 4.f / alpaka::math::min(acc, alpaka::math::abs(acc, pt_beta), SDL::pt_betaMax); //need to confimm the range-out value of 7 GeV
+
+        const float alphaInAbsReg = alpaka::math::max(acc, alpaka::math::abs(acc, alpha_InLo), alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * SDL::k2Rinv1GeVf / 3.0f, SDL::sinAlphaMax)));
+        const float alphaOutAbsReg = alpaka::math::max(acc, alpaka::math::abs(acc, alpha_OutLo), alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * SDL::k2Rinv1GeVf / 3.0f, SDL::sinAlphaMax)));
+        const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg*SDL::deltaZLum / z_InLo);
+        const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg*SDL::deltaZLum / z_OutLo);
+        const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum);
+        const float sinDPhi =alpaka::math::sin(acc, dPhi);
+
+        const float dBetaRIn2 = 0; // TODO-RH
+        float dBetaROut = 0;
+        if(isEC_lastLayer)
+        {
+            dBetaROut = (alpaka::math::sqrt(acc, mdsInGPU.anchorHighEdgeX[fourthMDIndex] * mdsInGPU.anchorHighEdgeX[fourthMDIndex] + mdsInGPU.anchorHighEdgeY[fourthMDIndex] * mdsInGPU.anchorHighEdgeY[fourthMDIndex]) - alpaka::math::sqrt(acc, mdsInGPU.anchorLowEdgeX[fourthMDIndex] * mdsInGPU.anchorLowEdgeX[fourthMDIndex] + mdsInGPU.anchorLowEdgeY[fourthMDIndex] * mdsInGPU.anchorLowEdgeY[fourthMDIndex])) * sinDPhi / drt_tl_axis;
+        }
+
+        const float dBetaROut2 =  dBetaROut * dBetaROut;
+
+        //FIXME: need faster version
+        betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, drt_tl_axis*SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax)) + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls*dBetaMuls);
+
+        //Cut #6: The real beta cut
+        pass = pass and ((alpaka::math::abs(acc, betaOut) < betaOutCut));
+        if(not pass) return pass;
+
+        float pt_betaIn = drt_tl_axis * SDL::k2Rinv1GeVf/alpaka::math::sin(acc, betaIn);
+        float pt_betaOut = drt_tl_axis * SDL::k2Rinv1GeVf /alpaka::math::sin(acc, betaOut);
+        float dBetaRes = 0.02f/alpaka::math::min(acc, sdOut_d,drt_InSeg);
+        float dBetaCut2 = (dBetaRes*dBetaRes * 2.0f + dBetaMuls * dBetaMuls + dBetaLum2 + dBetaRIn2 + dBetaROut2 + 0.25f * (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)));
+
+        float dBeta = betaIn - betaOut;
+        deltaBetaCut = alpaka::math::sqrt(acc, dBetaCut2);
+        pass = pass and (dBeta * dBeta <= dBetaCut2);
+
+        return pass;
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBEE(TAcc const & acc, struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, uint16_t& innerInnerLowerModuleIndex, uint16_t& innerOuterLowerModuleIndex, uint16_t& outerInnerLowerModuleIndex, uint16_t& outerOuterLowerModuleIndex, unsigned int& innerSegmentIndex, unsigned int& outerSegmentIndex, unsigned int& firstMDIndex, unsigned int& secondMDIndex, unsigned int& thirdMDIndex, unsigned int& fourthMDIndex, float& zOut, float& rtOut, float& deltaPhiPos, float& dPhi, float& betaIn, float&betaOut, float& pt_beta, float& zLo, float& rtLo, float& rtHi, float& sdlCut, float& betaInCut, float& betaOutCut, float& deltaBetaCut, float& kZ)
+    {
+        bool pass = true;
+        bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == SDL::PS);
+        bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == SDL::PS);
+
+        float rt_InLo = mdsInGPU.anchorRt[firstMDIndex];
+        float rt_InOut = mdsInGPU.anchorRt[secondMDIndex];
+        float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex];
+
+        float z_InLo = mdsInGPU.anchorZ[firstMDIndex];
+        float z_InOut = mdsInGPU.anchorZ[secondMDIndex];
+        float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex];
+
+        float alpha1GeV_OutLo = alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax));
+
+        float rtRatio_OutLoInLo = rt_OutLo / rt_InLo; // Outer segment beginning rt divided by inner segment beginning rt;
+        float dzDrtScale = alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly
+        float zpitch_InLo = (isPS_InLo ? SDL::pixelPSZpitch : SDL::strip2SZpitch);
+        float zpitch_OutLo = (isPS_OutLo ? SDL::pixelPSZpitch : SDL::strip2SZpitch);
+        float zGeom = zpitch_InLo + zpitch_OutLo;
+
+        zLo = z_InLo + (z_InLo - SDL::deltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo > 0.f ? 1.f : dzDrtScale) - zGeom;
+
+        // Cut #0: Preliminary (Only here in endcap case)
+        pass = pass and (z_InLo * z_OutLo > 0);
+        if(not pass) return pass;
+
+        float dLum = SDL::copysignf_alpaka(SDL::deltaZLum, z_InLo);
+        bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == SDL::PS;
+        float rtGeom1 = isOutSgInnerMDPS ? SDL::pixelPSZpitch : SDL::strip2SZpitch;
+        float zGeom1 = SDL::copysignf_alpaka(zGeom,z_InLo);
+        rtLo = rt_InLo * (1.f + (z_OutLo - z_InLo - zGeom1) / (z_InLo + zGeom1 + dLum) / dzDrtScale) - rtGeom1; //slope correction only on the lower end
+        zOut = z_OutLo;
+        rtOut = rt_OutLo;
+
+        //Cut #1: rt condition
+        pass = pass and (rtOut >= rtLo);
+        if(not pass) return pass;
+
+        float zInForHi = z_InLo - zGeom1 - dLum;
+        if(zInForHi * z_InLo < 0)
+        {
+            zInForHi = SDL::copysignf_alpaka(0.1f,z_InLo);
+        }
+        rtHi = rt_InLo * (1.f + (z_OutLo - z_InLo + zGeom1) / zInForHi) + rtGeom1;
+
+        //Cut #2: rt condition
+        pass = pass and ((rt_OutLo >= rtLo) & (rt_OutLo <= rtHi));
+        if(not pass) return pass;
+
+        float rIn = alpaka::math::sqrt(acc, z_InLo * z_InLo + rt_InLo * rt_InLo);
+        const float drtSDIn = rt_InOut - rt_InLo;
+        const float dzSDIn = z_InOut - z_InLo;
+        const float dr3SDIn = alpaka::math::sqrt(acc, rt_InOut * rt_InOut + z_InOut * z_InOut) - alpaka::math::sqrt(acc, rt_InLo * rt_InLo + z_InLo * z_InLo);
+
+        const float coshEta = dr3SDIn / drtSDIn; //direction estimate
+        const float dzOutInAbs = alpaka::math::abs(acc, z_OutLo - z_InLo);
+        const float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f);
+        const float zGeom1_another = SDL::pixelPSZpitch;
+        kZ = (z_OutLo - z_InLo) / dzSDIn;
+        float drtErr = zGeom1_another * zGeom1_another * drtSDIn * drtSDIn / dzSDIn / dzSDIn * (1.f - 2.f * kZ + 2.f * kZ * kZ);
+        const float sdlThetaMulsF = 0.015f * alpaka::math::sqrt(acc, 0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f) * alpaka::math::sqrt(acc, rIn / rt_InLo);
+        const float sdlMuls = sdlThetaMulsF * 3.f / SDL::ptCut * 4.f; //will need a better guess than x4?
+        drtErr += sdlMuls * sdlMuls * multDzDr * multDzDr / 3.f * coshEta * coshEta; //sloppy: relative muls is 1/3 of total muls
+        drtErr = alpaka::math::sqrt(acc, drtErr);
+        const float drtMean = drtSDIn * dzOutInAbs / alpaka::math::abs(acc, dzSDIn); //
+        const float rtWindow = drtErr + rtGeom1;
+        const float rtLo_another = rt_InLo + drtMean / dzDrtScale - rtWindow;
+        const float rtHi_another = rt_InLo + drtMean + rtWindow;
+
+        //Cut #3: rt-z pointed
+        pass = pass and ((kZ >= 0) & (rtOut >= rtLo) & (rtOut <= rtHi));
+        if(not pass) return pass;
+
+        const float sdlPVoff = 0.1f / rt_OutLo;
+        sdlCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, sdlMuls * sdlMuls + sdlPVoff*sdlPVoff);
+
+        deltaPhiPos = SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorX[secondMDIndex], mdsInGPU.anchorY[secondMDIndex], mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex]);
+
+        //Cut #4: deltaPhiPos can be tighter
+        pass = pass and (alpaka::math::abs(acc, deltaPhiPos) <= sdlCut);
+        if(not pass) return pass;
+
+        float midPointX = 0.5f*(mdsInGPU.anchorX[firstMDIndex] + mdsInGPU.anchorX[thirdMDIndex]);
+        float midPointY = 0.5f* (mdsInGPU.anchorY[firstMDIndex] + mdsInGPU.anchorY[thirdMDIndex]);
+        float midPointZ = 0.5f*(mdsInGPU.anchorZ[firstMDIndex] + mdsInGPU.anchorZ[thirdMDIndex]);
+        float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex];
+        float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex];
+        float diffZ = mdsInGPU.anchorZ[thirdMDIndex] - mdsInGPU.anchorZ[firstMDIndex];
+
+        dPhi = SDL::deltaPhi_alpaka(acc, midPointX, midPointY, diffX, diffY);
+        // Cut #5: deltaPhiChange
+        pass = pass and (alpaka::math::abs(acc, dPhi) <= sdlCut);
+        if(not pass) return pass;
+
+        float sdIn_alpha     = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]);
+        float sdIn_alpha_min = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]);
+        float sdIn_alpha_max = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]);
+        float sdOut_alpha = sdIn_alpha; //weird
+
+        float sdOut_alphaOut = SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex], mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]);
+
+        float sdOut_alphaOut_min = SDL::phi_mpi_pi_alpaka(acc, __H2F(segmentsInGPU.dPhiChangeMins[outerSegmentIndex]) - __H2F(segmentsInGPU.dPhiMins[outerSegmentIndex]));
+        float sdOut_alphaOut_max = SDL::phi_mpi_pi_alpaka(acc, __H2F(segmentsInGPU.dPhiChangeMaxs[outerSegmentIndex]) - __H2F(segmentsInGPU.dPhiMaxs[outerSegmentIndex]));
+
+        float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex];
+        float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex];
+        float tl_axis_z = mdsInGPU.anchorZ[fourthMDIndex] - mdsInGPU.anchorZ[firstMDIndex];
+
+        betaIn = sdIn_alpha - SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorX[firstMDIndex], mdsInGPU.anchorY[firstMDIndex], tl_axis_x, tl_axis_y);
+
+        float betaInRHmin = betaIn;
+        float betaInRHmax = betaIn;
+        betaOut = -sdOut_alphaOut + SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex], tl_axis_x, tl_axis_y);
+
+        float betaOutRHmin = betaOut;
+        float betaOutRHmax = betaOut;
+
+        bool isEC_secondLayer = (modulesInGPU.subdets[innerOuterLowerModuleIndex] == SDL::Endcap) and (modulesInGPU.moduleType[innerOuterLowerModuleIndex] == SDL::TwoS);
+
+        if(isEC_secondLayer)
+        {
+            betaInRHmin = betaIn - sdIn_alpha_min + sdIn_alpha;
+            betaInRHmax = betaIn - sdIn_alpha_max + sdIn_alpha;
+        }
+
+        betaOutRHmin = betaOut - sdOut_alphaOut_min + sdOut_alphaOut;
+        betaOutRHmax = betaOut - sdOut_alphaOut_max + sdOut_alphaOut;
+
+        float swapTemp;
+        if(alpaka::math::abs(acc, betaOutRHmin) > alpaka::math::abs(acc, betaOutRHmax))
+        {
+            swapTemp = betaOutRHmin;
+            betaOutRHmin = betaOutRHmax;
+            betaOutRHmax = swapTemp;
+        }
+
+        if(alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax))
+        {
+            swapTemp = betaInRHmin;
+            betaInRHmin = betaInRHmax;
+            betaInRHmax = swapTemp;
+        }
+
+        float sdIn_dr = alpaka::math::sqrt(acc, (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]));
+        float sdIn_d = rt_InOut - rt_InLo;
+
+        float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y);
+        const float corrF = 1.f;
+        betaInCut = alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax)) + (0.02f / sdIn_d);
+
+        //Cut #6: first beta cut
+        pass = pass and (alpaka::math::abs(acc, betaInRHmin) < betaInCut);
+        if(not pass) return pass;
+
+        float betaAv = 0.5f * (betaIn + betaOut);
+        pt_beta = dr * SDL::k2Rinv1GeVf /alpaka::math::sin(acc, betaAv);
+
+        float lIn = 5;
+        float lOut = 11;
+
+        float sdOut_dr = alpaka::math::sqrt(acc, (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) * (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) + (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) * (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]));
+        float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex];
+
+        SDL::runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn);
+
+        const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) : 0.; //mean value of min,max is the old betaIn
+        const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) : 0.;
+        betaInRHmin *= betaInMMSF;
+        betaInRHmax *= betaInMMSF;
+        betaOutRHmin *= betaOutMMSF;
+        betaOutRHmax *= betaOutMMSF;
+
+        const float dBetaMuls = sdlThetaMulsF * 4.f / alpaka::math::min(acc, alpaka::math::abs(acc, pt_beta), SDL::pt_betaMax); //need to confirm the range-out value of 7 GeV
+
+        const float alphaInAbsReg = alpaka::math::max(acc, alpaka::math::abs(acc, sdIn_alpha), alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * SDL::k2Rinv1GeVf / 3.0f, SDL::sinAlphaMax)));
+        const float alphaOutAbsReg = alpaka::math::max(acc, alpaka::math::abs(acc, sdOut_alpha), alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * SDL::k2Rinv1GeVf / 3.0f, SDL::sinAlphaMax)));
+        const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg*SDL::deltaZLum / z_InLo);
+        const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg*SDL::deltaZLum / z_OutLo);
+        const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum);
+        const float sinDPhi =alpaka::math::sin(acc, dPhi);
+
+        const float dBetaRIn2 = 0; // TODO-RH
+        float dBetaROut = 0;
+        if(modulesInGPU.moduleType[outerOuterLowerModuleIndex] == SDL::TwoS)
+        {
+            dBetaROut = (alpaka::math::sqrt(acc, mdsInGPU.anchorHighEdgeX[fourthMDIndex] * mdsInGPU.anchorHighEdgeX[fourthMDIndex] + mdsInGPU.anchorHighEdgeY[fourthMDIndex] * mdsInGPU.anchorHighEdgeY[fourthMDIndex]) - alpaka::math::sqrt(acc, mdsInGPU.anchorLowEdgeX[fourthMDIndex] * mdsInGPU.anchorLowEdgeX[fourthMDIndex] + mdsInGPU.anchorLowEdgeY[fourthMDIndex] * mdsInGPU.anchorLowEdgeY[fourthMDIndex])) * sinDPhi / dr;
+        }
+
+        const float dBetaROut2 = dBetaROut * dBetaROut;
+        //FIXME: need faster version
+        betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, dr*SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax)) + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls*dBetaMuls);
+
+        //Cut #6: The real beta cut
+        pass = pass and (alpaka::math::abs(acc, betaOut) < betaOutCut);
+        if(not pass) return pass;
+
+        float pt_betaIn = dr * SDL::k2Rinv1GeVf/alpaka::math::sin(acc, betaIn);
+        float pt_betaOut = dr * SDL::k2Rinv1GeVf /alpaka::math::sin(acc, betaOut);
+        float dBetaRes = 0.02f/alpaka::math::min(acc, sdOut_d,sdIn_d);
+        float dBetaCut2 = (dBetaRes*dBetaRes * 2.0f + dBetaMuls * dBetaMuls + dBetaLum2 + dBetaRIn2 + dBetaROut2 + 0.25f * (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)));
+        float dBeta = betaIn - betaOut;
+        deltaBetaCut = alpaka::math::sqrt(acc, dBetaCut2);
+        //Cut #7: Cut on dBet
+        pass = pass and (dBeta * dBeta <= dBetaCut2);
+
+        return pass;
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoEEEE(TAcc const & acc, struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, uint16_t& innerInnerLowerModuleIndex, uint16_t& innerOuterLowerModuleIndex, uint16_t& outerInnerLowerModuleIndex, uint16_t& outerOuterLowerModuleIndex, unsigned int& innerSegmentIndex, unsigned int& outerSegmentIndex, unsigned int& firstMDIndex, unsigned int& secondMDIndex, unsigned int& thirdMDIndex, unsigned int& fourthMDIndex, float& zOut, float& rtOut, float& deltaPhiPos, float& dPhi, float& betaIn, float&betaOut, float& pt_beta, float& zLo, float& rtLo, float& rtHi, float& sdlCut, float& betaInCut, float& betaOutCut, float& deltaBetaCut, float& kZ)
+    {
+        bool pass = true;
+
+        bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == SDL::PS);
+        bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == SDL::PS);
+
+        float rt_InLo = mdsInGPU.anchorRt[firstMDIndex];
+        float rt_InOut = mdsInGPU.anchorRt[secondMDIndex];
+        float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex];
+
+        float z_InLo = mdsInGPU.anchorZ[firstMDIndex];
+        float z_InOut = mdsInGPU.anchorZ[secondMDIndex];
+        float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex];
+
+        float alpha1GeV_OutLo = alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax));
+
+        float rtRatio_OutLoInLo = rt_OutLo / rt_InLo; // Outer segment beginning rt divided by inner segment beginning rt;
+        float dzDrtScale = alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly
+        float zpitch_InLo = (isPS_InLo ? SDL::pixelPSZpitch : SDL::strip2SZpitch);
+        float zpitch_OutLo = (isPS_OutLo ? SDL::pixelPSZpitch : SDL::strip2SZpitch);
+        float zGeom = zpitch_InLo + zpitch_OutLo;
+
+        zLo = z_InLo + (z_InLo - SDL::deltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo > 0.f ? 1.f : dzDrtScale) - zGeom; //slope-correction only on outer end
+
+        // Cut #0: Preliminary (Only here in endcap case)
+        pass = pass and ((z_InLo * z_OutLo) > 0);
+        if(not pass) return pass;
+
+        float dLum = SDL::copysignf_alpaka(SDL::deltaZLum, z_InLo);
+        bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == SDL::PS;
+        bool isInSgInnerMDPS = modulesInGPU.moduleType[innerInnerLowerModuleIndex] == SDL::PS;
+
+        float rtGeom = (isInSgInnerMDPS and isOutSgInnerMDPS) ? 2.f * SDL::pixelPSZpitch : (isInSgInnerMDPS or isOutSgInnerMDPS) ? SDL::pixelPSZpitch + SDL::strip2SZpitch : 2.f * SDL::strip2SZpitch;
+
+        float zGeom1 = SDL::copysignf_alpaka(zGeom,z_InLo);
+        float dz = z_OutLo - z_InLo;
+        rtLo = rt_InLo * (1.f + dz / (z_InLo + dLum) / dzDrtScale) - rtGeom; //slope correction only on the lower end
+
+        zOut = z_OutLo;
+        rtOut = rt_OutLo;
+
+        //Cut #1: rt condition
+
+        rtHi = rt_InLo * (1.f + dz / (z_InLo - dLum)) + rtGeom;
+
+        pass = pass and ((rtOut >= rtLo) & (rtOut <= rtHi));
+        if(not pass) return pass;
+
+        bool isInSgOuterMDPS = modulesInGPU.moduleType[innerOuterLowerModuleIndex] == SDL::PS;
+
+        float drOutIn = rtOut - rt_InLo;
+        const float drtSDIn = rt_InOut - rt_InLo;
+        const float dzSDIn = z_InOut - z_InLo;
+        const float dr3SDIn = alpaka::math::sqrt(acc, rt_InOut * rt_InOut + z_InOut * z_InOut) - alpaka::math::sqrt(acc, rt_InLo * rt_InLo + z_InLo * z_InLo);
+        float coshEta = dr3SDIn / drtSDIn; //direction estimate
+        float dzOutInAbs =  alpaka::math::abs(acc, z_OutLo - z_InLo);
+        float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f);
+
+        kZ = (z_OutLo - z_InLo) / dzSDIn;
+        float sdlThetaMulsF = 0.015f * alpaka::math::sqrt(acc, 0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f);
+
+        float sdlMuls = sdlThetaMulsF * 3.f / SDL::ptCut * 4.f; //will need a better guess than x4?
+
+        float drtErr = alpaka::math::sqrt(acc, SDL::pixelPSZpitch * SDL::pixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + sdlMuls * sdlMuls * multDzDr * multDzDr / 3.f * coshEta * coshEta);
+
+        float drtMean = drtSDIn * dzOutInAbs/alpaka::math::abs(acc, dzSDIn);
+        float rtWindow = drtErr + rtGeom;
+        float rtLo_point = rt_InLo + drtMean / dzDrtScale - rtWindow;
+        float rtHi_point = rt_InLo + drtMean + rtWindow;
+
+        // Cut #3: rt-z pointed
+        // https://github.com/slava77/cms-tkph2-ntuple/blob/superDoubletLinked-91X-noMock/doubletAnalysis.C#L3765
+
+        if (isInSgInnerMDPS and isInSgOuterMDPS) // If both PS then we can point
+        {
+            pass = pass and (kZ >= 0 and rtOut >= rtLo_point and rtOut <= rtHi_point);
+            if(not pass) return pass;
+        }
+
+        float sdlPVoff = 0.1f/rtOut;
+        sdlCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, sdlMuls * sdlMuls + sdlPVoff * sdlPVoff);
+
+        deltaPhiPos = SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorX[secondMDIndex], mdsInGPU.anchorY[secondMDIndex], mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex]);
+
+        pass = pass and (alpaka::math::abs(acc, deltaPhiPos) <= sdlCut);
+        if(not pass) return pass;
+
+        float midPointX = 0.5f*(mdsInGPU.anchorX[firstMDIndex] + mdsInGPU.anchorX[thirdMDIndex]);
+        float midPointY = 0.5f* (mdsInGPU.anchorY[firstMDIndex] + mdsInGPU.anchorY[thirdMDIndex]);
+        float midPointZ = 0.5f*(mdsInGPU.anchorZ[firstMDIndex] + mdsInGPU.anchorZ[thirdMDIndex]);
+        float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex];
+        float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex];
+        float diffZ = mdsInGPU.anchorZ[thirdMDIndex] - mdsInGPU.anchorZ[firstMDIndex];
+
+        dPhi = SDL::deltaPhi_alpaka(acc, midPointX, midPointY, diffX, diffY);
+
+        // Cut #5: deltaPhiChange
+        pass = pass and ((alpaka::math::abs(acc, dPhi) <= sdlCut));
+        if(not pass) return pass;
+
+        float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]);
+        float sdOut_alpha = sdIn_alpha; //weird
+        float sdOut_dPhiPos = SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorX[thirdMDIndex], mdsInGPU.anchorY[thirdMDIndex], mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex]);
+
+        float sdOut_dPhiChange = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]);
+        float sdOut_dPhiChange_min = __H2F(segmentsInGPU.dPhiChangeMins[outerSegmentIndex]);
+        float sdOut_dPhiChange_max = __H2F(segmentsInGPU.dPhiChangeMaxs[outerSegmentIndex]);
+
+        float sdOut_alphaOutRHmin = SDL::phi_mpi_pi_alpaka(acc, sdOut_dPhiChange_min - sdOut_dPhiPos);
+        float sdOut_alphaOutRHmax = SDL::phi_mpi_pi_alpaka(acc, sdOut_dPhiChange_max - sdOut_dPhiPos);
+        float sdOut_alphaOut = SDL::phi_mpi_pi_alpaka(acc, sdOut_dPhiChange - sdOut_dPhiPos);
+
+        float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex];
+        float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex];
+        float tl_axis_z = mdsInGPU.anchorZ[fourthMDIndex] - mdsInGPU.anchorZ[firstMDIndex];
+
+        betaIn = sdIn_alpha - SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorX[firstMDIndex], mdsInGPU.anchorY[firstMDIndex], tl_axis_x, tl_axis_y);
+
+        float sdIn_alphaRHmin = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]);
+        float sdIn_alphaRHmax = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]);
+        float betaInRHmin = betaIn + sdIn_alphaRHmin - sdIn_alpha;
+        float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha;
+
+        betaOut = -sdOut_alphaOut + SDL::deltaPhi_alpaka(acc, mdsInGPU.anchorX[fourthMDIndex], mdsInGPU.anchorY[fourthMDIndex], tl_axis_x, tl_axis_y);
+
+        float betaOutRHmin = betaOut - sdOut_alphaOutRHmin + sdOut_alphaOut;
+        float betaOutRHmax = betaOut - sdOut_alphaOutRHmax + sdOut_alphaOut;
+
+        float swapTemp;
+        if(alpaka::math::abs(acc, betaOutRHmin) > alpaka::math::abs(acc, betaOutRHmax))
+        {
+            swapTemp = betaOutRHmin;
+            betaOutRHmin = betaOutRHmax;
+            betaOutRHmax = swapTemp;
+        }
+
+        if(alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax))
+        {
+            swapTemp = betaInRHmin;
+            betaInRHmin = betaInRHmax;
+            betaInRHmax = swapTemp;
+        }
+        float sdIn_dr = alpaka::math::sqrt(acc, (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]));
+        float sdIn_d = rt_InOut - rt_InLo;
+
+        float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y);
+        const float corrF = 1.f;
+        betaInCut = alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax)) + (0.02f / sdIn_d);
+
+        //Cut #6: first beta cut
+        pass = pass and (alpaka::math::abs(acc, betaInRHmin) < betaInCut);
+        if(not pass) return pass;
+
+        float betaAv = 0.5f * (betaIn + betaOut);
+        pt_beta = dr * SDL::k2Rinv1GeVf /alpaka::math::sin(acc, betaAv);
+
+        int lIn= 11; //endcap
+        int lOut = 13; //endcap
+
+        float sdOut_dr = alpaka::math::sqrt(acc, (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) * (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) + (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) * (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]));
+        float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex];
+
+        float diffDr = alpaka::math::abs(acc, sdIn_dr - sdOut_dr)/alpaka::math::abs(acc, sdIn_dr + sdOut_dr);
+
+        SDL::runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn);
+
+        const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) : 0.; //mean value of min,max is the old betaIn
+        const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) : 0.;
+        betaInRHmin *= betaInMMSF;
+        betaInRHmax *= betaInMMSF;
+        betaOutRHmin *= betaOutMMSF;
+        betaOutRHmax *= betaOutMMSF;
+
+        const float dBetaMuls = sdlThetaMulsF * 4.f / alpaka::math::min(acc, alpaka::math::abs(acc, pt_beta), SDL::pt_betaMax); //need to confirm the range-out value of 7 GeV
+
+        const float alphaInAbsReg = alpaka::math::max(acc, alpaka::math::abs(acc, sdIn_alpha), alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * SDL::k2Rinv1GeVf / 3.0f, SDL::sinAlphaMax)));
+        const float alphaOutAbsReg = alpaka::math::max(acc, alpaka::math::abs(acc, sdOut_alpha), alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * SDL::k2Rinv1GeVf / 3.0f, SDL::sinAlphaMax)));
+        const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg*SDL::deltaZLum / z_InLo);
+        const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg*SDL::deltaZLum / z_OutLo);
+        const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum);
+        const float sinDPhi =alpaka::math::sin(acc, dPhi);
+
+        const float dBetaRIn2 = 0; // TODO-RH
+
+        float dBetaROut2 = 0;//TODO-RH
+        //FIXME: need faster version
+        betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, dr*SDL::k2Rinv1GeVf / SDL::ptCut, SDL::sinAlphaMax)) + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls*dBetaMuls);
+
+        //Cut #6: The real beta cut
+        pass = pass and (alpaka::math::abs(acc, betaOut) < betaOutCut);
+        if(not pass) return pass;
+
+        float pt_betaIn = dr * SDL::k2Rinv1GeVf/alpaka::math::sin(acc, betaIn);
+        float pt_betaOut = dr * SDL::k2Rinv1GeVf /alpaka::math::sin(acc, betaOut);
+        float dBetaRes = 0.02f/alpaka::math::min(acc, sdOut_d,sdIn_d);
+        float dBetaCut2 = (dBetaRes*dBetaRes * 2.0f + dBetaMuls * dBetaMuls + dBetaLum2 + dBetaRIn2 + dBetaROut2 + 0.25f * (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)));
+        float dBeta = betaIn - betaOut;
+        //Cut #7: Cut on dBeta
+        deltaBetaCut = alpaka::math::sqrt(acc, dBetaCut2);
+
+        pass = pass and (dBeta * dBeta <= dBetaCut2);
+
+        return pass;
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletAlgoSelector(TAcc const & acc, struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, uint16_t& innerInnerLowerModuleIndex, uint16_t& innerOuterLowerModuleIndex, uint16_t& outerInnerLowerModuleIndex, uint16_t& outerOuterLowerModuleIndex, unsigned int& innerSegmentIndex, unsigned int& outerSegmentIndex, unsigned int& firstMDIndex, unsigned int& secondMDIndex, unsigned int& thirdMDIndex, unsigned int& fourthMDIndex, float& zOut, float& rtOut, float& deltaPhiPos, float& deltaPhi, float& betaIn, float&betaOut, float& pt_beta, float& zLo, float& zHi, float& rtLo, float& rtHi, float& zLoPointed, float& zHiPointed, float& sdlCut, float& betaInCut, float& betaOutCut, float& deltaBetaCut, float& kZ)
+    {
+        bool pass = false;
+
+        zLo = -999;
+        zHi = -999;
+        rtLo = -999;
+        rtHi = -999;
+        zLoPointed = -999;
+        zHiPointed = -999;
+        kZ = -999;
+        betaInCut = -999;
+
+        short innerInnerLowerModuleSubdet = modulesInGPU.subdets[innerInnerLowerModuleIndex];
+        short innerOuterLowerModuleSubdet = modulesInGPU.subdets[innerOuterLowerModuleIndex];
+        short outerInnerLowerModuleSubdet = modulesInGPU.subdets[outerInnerLowerModuleIndex];
+        short outerOuterLowerModuleSubdet = modulesInGPU.subdets[outerOuterLowerModuleIndex];
+
+        if(innerInnerLowerModuleSubdet == SDL::Barrel
+                and innerOuterLowerModuleSubdet == SDL::Barrel
+                and outerInnerLowerModuleSubdet == SDL::Barrel
+                and outerOuterLowerModuleSubdet == SDL::Barrel)
+        {
+            return runQuintupletDefaultAlgoBBBB(acc, modulesInGPU,mdsInGPU,segmentsInGPU,innerInnerLowerModuleIndex,innerOuterLowerModuleIndex,outerInnerLowerModuleIndex,outerOuterLowerModuleIndex,innerSegmentIndex,outerSegmentIndex,firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, zOut,rtOut,deltaPhiPos,deltaPhi,betaIn,betaOut,pt_beta, zLo, zHi, zLoPointed, zHiPointed, sdlCut, betaInCut, betaOutCut, deltaBetaCut);
+        }
+        else if(innerInnerLowerModuleSubdet == SDL::Barrel
+                and innerOuterLowerModuleSubdet == SDL::Barrel
+                and outerInnerLowerModuleSubdet == SDL::Endcap
+                and outerOuterLowerModuleSubdet == SDL::Endcap)
+        {
+        return runQuintupletDefaultAlgoBBEE(acc, modulesInGPU,mdsInGPU,segmentsInGPU,innerInnerLowerModuleIndex,innerOuterLowerModuleIndex,outerInnerLowerModuleIndex,outerOuterLowerModuleIndex,innerSegmentIndex,outerSegmentIndex,firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, zOut,rtOut,deltaPhiPos,deltaPhi,betaIn,betaOut,pt_beta, zLo, rtLo, rtHi, sdlCut, betaInCut, betaOutCut, deltaBetaCut, kZ);
+        }
+        else if(innerInnerLowerModuleSubdet == SDL::Barrel
+                and innerOuterLowerModuleSubdet == SDL::Barrel
+                and outerInnerLowerModuleSubdet == SDL::Barrel
+                and outerOuterLowerModuleSubdet == SDL::Endcap)
+        {
+            return runQuintupletDefaultAlgoBBBB(acc, modulesInGPU,mdsInGPU,segmentsInGPU,innerInnerLowerModuleIndex,innerOuterLowerModuleIndex,outerInnerLowerModuleIndex,outerOuterLowerModuleIndex,innerSegmentIndex,outerSegmentIndex,firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, zOut,rtOut,deltaPhiPos,deltaPhi,betaIn,betaOut,pt_beta,zLo, zHi, zLoPointed, zHiPointed, sdlCut, betaInCut, betaOutCut, deltaBetaCut);
+        }
+        else if(innerInnerLowerModuleSubdet == SDL::Barrel
+                and innerOuterLowerModuleSubdet == SDL::Endcap
+                and outerInnerLowerModuleSubdet == SDL::Endcap
+                and outerOuterLowerModuleSubdet == SDL::Endcap)
+        {
+            return runQuintupletDefaultAlgoBBEE(acc, modulesInGPU,mdsInGPU,segmentsInGPU,innerInnerLowerModuleIndex,innerOuterLowerModuleIndex,outerInnerLowerModuleIndex,outerOuterLowerModuleIndex,innerSegmentIndex,outerSegmentIndex, firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, zOut,rtOut,deltaPhiPos,deltaPhi,betaIn,betaOut,pt_beta, zLo, rtLo, rtHi, sdlCut, betaInCut, betaOutCut, deltaBetaCut, kZ);
+        }
+        else if(innerInnerLowerModuleSubdet == SDL::Endcap
+                and innerOuterLowerModuleSubdet == SDL::Endcap
+                and outerInnerLowerModuleSubdet == SDL::Endcap
+                and outerOuterLowerModuleSubdet == SDL::Endcap)
+        {
+            return runQuintupletDefaultAlgoEEEE(acc, modulesInGPU,mdsInGPU,segmentsInGPU,innerInnerLowerModuleIndex,innerOuterLowerModuleIndex,outerInnerLowerModuleIndex,outerOuterLowerModuleIndex,innerSegmentIndex,outerSegmentIndex, firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, zOut,rtOut,deltaPhiPos,deltaPhi,betaIn,betaOut,pt_beta, zLo, rtLo, rtHi, sdlCut, betaInCut, betaOutCut, deltaBetaCut, kZ);
+        }
+
+        return pass;
+    };
+
+    template<typename TAcc>
+    ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgo(TAcc const & acc, struct SDL::modules& modulesInGPU, struct SDL::miniDoublets& mdsInGPU, struct SDL::segments& segmentsInGPU, struct SDL::triplets& tripletsInGPU, uint16_t& lowerModuleIndex1, uint16_t& lowerModuleIndex2, uint16_t& lowerModuleIndex3, uint16_t& lowerModuleIndex4, uint16_t& lowerModuleIndex5, unsigned int& innerTripletIndex, unsigned int& outerTripletIndex, float& innerRadius, float& outerRadius, float& bridgeRadius, float& regressionG, float& regressionF, float& regressionRadius, float& rzChiSquared, float& chiSquared, float& nonAnchorChiSquared)
+    {
+        bool pass = true;
+        unsigned int firstSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex];
+        unsigned int secondSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex + 1];
+        unsigned int thirdSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex];
+        unsigned int fourthSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex + 1];
+
+        unsigned int innerOuterOuterMiniDoubletIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex + 1]; //inner triplet outer segment outer MD index
+        unsigned int outerInnerInnerMiniDoubletIndex = segmentsInGPU.mdIndices[2 * thirdSegmentIndex]; //outer triplet inner segmnet inner MD index
+
+        //this cut reduces the number of candidates by a factor of 3, i.e., 2 out of 3 warps can end right here!
+        if (innerOuterOuterMiniDoubletIndex != outerInnerInnerMiniDoubletIndex) return false;
+        
+        //apply T4 criteria between segments 1 and 3
+        float zOut, rtOut, deltaPhiPos, deltaPhi, betaIn, betaOut, pt_beta; //temp stuff
+        float zLo, zHi, rtLo, rtHi, zLoPointed, zHiPointed, sdlCut, betaInCut, betaOutCut, deltaBetaCut, kZ;
+
+        unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * firstSegmentIndex];
+        unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex];
+        unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex + 1];
+        unsigned int fourthMDIndex = segmentsInGPU.mdIndices[2 * thirdSegmentIndex + 1];
+        unsigned int fifthMDIndex = segmentsInGPU.mdIndices[2 * fourthSegmentIndex + 1];
+
+        pass = pass and runQuintupletAlgoSelector(acc, modulesInGPU, mdsInGPU, segmentsInGPU, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, firstSegmentIndex, thirdSegmentIndex, firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, zOut, rtOut, deltaPhiPos, deltaPhi, betaIn, betaOut, pt_beta, zLo, zHi, rtLo, rtHi, zLoPointed, zHiPointed, sdlCut, betaInCut, betaOutCut, deltaBetaCut, kZ);
+        if(not pass) return pass;
+
+        pass = pass and runQuintupletAlgoSelector(acc, modulesInGPU, mdsInGPU, segmentsInGPU, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex4, lowerModuleIndex5, firstSegmentIndex, fourthSegmentIndex, firstMDIndex, secondMDIndex, fourthMDIndex, fifthMDIndex, zOut, rtOut, deltaPhiPos, deltaPhi, betaIn, betaOut, pt_beta, zLo, zHi, rtLo, rtHi, zLoPointed, zHiPointed, sdlCut, betaInCut, betaOutCut, deltaBetaCut, kZ);
+        if(not pass) return pass;
+
+        pass = pass and passT5RZConstraint(acc, modulesInGPU, mdsInGPU, firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, fifthMDIndex, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5);
+        if(not pass) return pass;
+
+        float x1 = mdsInGPU.anchorX[firstMDIndex];
+        float x2 = mdsInGPU.anchorX[secondMDIndex];
+        float x3 = mdsInGPU.anchorX[thirdMDIndex];
+        float x4 = mdsInGPU.anchorX[fourthMDIndex];
+        float x5 = mdsInGPU.anchorX[fifthMDIndex];
+        
+        float y1 = mdsInGPU.anchorY[firstMDIndex];
+        float y2 = mdsInGPU.anchorY[secondMDIndex];
+        float y3 = mdsInGPU.anchorY[thirdMDIndex];
+        float y4 = mdsInGPU.anchorY[fourthMDIndex];
+        float y5 = mdsInGPU.anchorY[fifthMDIndex];
+
+        //construct the arrays
+        float x1Vec[] = {x1, x1, x1};
+        float y1Vec[] = {y1, y1, y1};
+        float x2Vec[] = {x2, x2, x2};
+        float y2Vec[] = {y2, y2, y2};
+        float x3Vec[] = {x3, x3, x3};
+        float y3Vec[] = {y3, y3, y3};
+
+        if(modulesInGPU.subdets[lowerModuleIndex1] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == SDL::TwoS)
+        {
+            x1Vec[1] = mdsInGPU.anchorLowEdgeX[firstMDIndex];
+            x1Vec[2] = mdsInGPU.anchorHighEdgeX[firstMDIndex];
+
+            y1Vec[1] = mdsInGPU.anchorLowEdgeY[firstMDIndex];
+            y1Vec[2] = mdsInGPU.anchorHighEdgeY[firstMDIndex];
+        }
+        if(modulesInGPU.subdets[lowerModuleIndex2] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == SDL::TwoS)
+        {
+            x2Vec[1] = mdsInGPU.anchorLowEdgeX[secondMDIndex];
+            x2Vec[2] = mdsInGPU.anchorHighEdgeX[secondMDIndex];
+
+            y2Vec[1] = mdsInGPU.anchorLowEdgeY[secondMDIndex];
+            y2Vec[2] = mdsInGPU.anchorHighEdgeY[secondMDIndex];
+        }
+        if(modulesInGPU.subdets[lowerModuleIndex3] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == SDL::TwoS)
+        {
+            x3Vec[1] = mdsInGPU.anchorLowEdgeX[thirdMDIndex];
+            x3Vec[2] = mdsInGPU.anchorHighEdgeX[thirdMDIndex];
+
+            y3Vec[1] = mdsInGPU.anchorLowEdgeY[thirdMDIndex];
+            y3Vec[2] = mdsInGPU.anchorHighEdgeY[thirdMDIndex];
+        }
+
+        float innerRadiusMin2S, innerRadiusMax2S;
+        computeErrorInRadius(acc, x1Vec, y1Vec, x2Vec, y2Vec, x3Vec, y3Vec, innerRadiusMin2S, innerRadiusMax2S);
+
+        for (int i=0; i<3; i++) 
+        {
+            x1Vec[i] = x4;
+            y1Vec[i] = y4;
+        }
+        if(modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex4] == SDL::TwoS)
+        {
+            x1Vec[1] = mdsInGPU.anchorLowEdgeX[fourthMDIndex];
+            x1Vec[2] = mdsInGPU.anchorHighEdgeX[fourthMDIndex];
+
+            y1Vec[1] = mdsInGPU.anchorLowEdgeY[fourthMDIndex];
+            y1Vec[2] = mdsInGPU.anchorHighEdgeY[fourthMDIndex];
+        }
+
+        float bridgeRadiusMin2S, bridgeRadiusMax2S;
+        computeErrorInRadius(acc, x2Vec, y2Vec, x3Vec, y3Vec, x1Vec, y1Vec, bridgeRadiusMin2S, bridgeRadiusMax2S);
+
+        for(int i=0; i<3; i++) 
+        {
+            x2Vec[i] = x5;
+            y2Vec[i] = y5;
+        }
+        if(modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap and modulesInGPU.moduleType[lowerModuleIndex5] == SDL::TwoS)
+        {
+            x2Vec[1] = mdsInGPU.anchorLowEdgeX[fifthMDIndex];
+            x2Vec[2] = mdsInGPU.anchorHighEdgeX[fifthMDIndex];
+
+            y2Vec[1] = mdsInGPU.anchorLowEdgeY[fifthMDIndex];
+            y2Vec[2] = mdsInGPU.anchorHighEdgeY[fifthMDIndex];
+        }
+
+        float outerRadiusMin2S, outerRadiusMax2S;
+        computeErrorInRadius(acc, x3Vec, y3Vec, x1Vec, y1Vec, x2Vec, y2Vec, outerRadiusMin2S, outerRadiusMax2S);
+
+        float g, f;
+        innerRadius = computeRadiusFromThreeAnchorHits(acc, x1, y1, x2, y2, x3, y3, g, f);
+        outerRadius = computeRadiusFromThreeAnchorHits(acc, x3, y3, x4, y4, x5, y5, g, f);
+        bridgeRadius = computeRadiusFromThreeAnchorHits(acc, x2, y2, x3, y3, x4, y4, g, f);
+
+        pass = pass & (innerRadius >= 0.95f * ptCut/(2.f * k2Rinv1GeVf));
+
+        float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax;
+
+        //split by category
+        bool tempPass;
+        if(modulesInGPU.subdets[lowerModuleIndex1] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex2] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex3] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex4] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex5] == SDL::Barrel)
+        {
+            tempPass = matchRadiiBBBBB(acc, innerRadius, bridgeRadius, outerRadius, innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
+        }
+        else if(modulesInGPU.subdets[lowerModuleIndex1] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex2] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex3] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex4] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap)
+        {
+            tempPass = matchRadiiBBBBE(acc, innerRadius, bridgeRadius, outerRadius, innerRadiusMin2S, innerRadiusMax2S, bridgeRadiusMin2S, bridgeRadiusMax2S, outerRadiusMin2S, outerRadiusMax2S, innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
+        }
+        else if(modulesInGPU.subdets[lowerModuleIndex1] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex2] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex3] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap and modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap)
+        {
+            if(modulesInGPU.layers[lowerModuleIndex1] == 1)
+            {
+                tempPass = matchRadiiBBBEE12378(acc, innerRadius, bridgeRadius, outerRadius,innerRadiusMin2S, innerRadiusMax2S, bridgeRadiusMin2S, bridgeRadiusMax2S, outerRadiusMin2S, outerRadiusMax2S, innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
+            }
+            else if(modulesInGPU.layers[lowerModuleIndex1] == 2)
+            {
+                tempPass = matchRadiiBBBEE23478(acc, innerRadius, bridgeRadius, outerRadius,innerRadiusMin2S, innerRadiusMax2S, bridgeRadiusMin2S, bridgeRadiusMax2S, outerRadiusMin2S, outerRadiusMax2S, innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
+            }
+            else
+            {
+                tempPass = matchRadiiBBBEE34578(acc, innerRadius, bridgeRadius, outerRadius,innerRadiusMin2S, innerRadiusMax2S, bridgeRadiusMin2S, bridgeRadiusMax2S, outerRadiusMin2S, outerRadiusMax2S, innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
+            }
+        }
+
+        else if(modulesInGPU.subdets[lowerModuleIndex1] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex2] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex3] == SDL::Endcap and modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap and modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap)
+        {
+            tempPass = matchRadiiBBEEE(acc, innerRadius, bridgeRadius, outerRadius, innerRadiusMin2S, innerRadiusMax2S, bridgeRadiusMin2S, bridgeRadiusMax2S, outerRadiusMin2S, outerRadiusMax2S, innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
+        }
+        else if(modulesInGPU.subdets[lowerModuleIndex1] == SDL::Barrel and modulesInGPU.subdets[lowerModuleIndex2] == SDL::Endcap and modulesInGPU.subdets[lowerModuleIndex3] == SDL::Endcap and modulesInGPU.subdets[lowerModuleIndex4] == SDL::Endcap and modulesInGPU.subdets[lowerModuleIndex5] == SDL::Endcap)
+        {
+            tempPass = matchRadiiBEEEE(acc, innerRadius, bridgeRadius, outerRadius, innerRadiusMin2S, innerRadiusMax2S, bridgeRadiusMin2S, bridgeRadiusMax2S, outerRadiusMin2S, outerRadiusMax2S, innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
+        }
+        else
+        {
+            tempPass = matchRadiiEEEEE(acc, innerRadius, bridgeRadius, outerRadius, innerRadiusMin2S, innerRadiusMax2S, bridgeRadiusMin2S, bridgeRadiusMax2S, outerRadiusMin2S, outerRadiusMax2S,innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax, outerInvRadiusMin, outerInvRadiusMax);
+        }
+
+        //compute regression radius right here - this computation is expensive!!!
+        pass = pass and tempPass;
+        if(not pass) return pass;
+
+        float xVec[] = {x1, x2, x3, x4, x5};
+        float yVec[] = {y1, y2, y3, y4, y5};
+        float sigmas[5], delta1[5], delta2[5], slopes[5];
+        bool isFlat[5];
+        //5 categories for sigmas
+        const uint16_t lowerModuleIndices[] = {lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5};
+
+        computeSigmasForRegression(acc, modulesInGPU, lowerModuleIndices, delta1, delta2, slopes, isFlat);
+        regressionRadius = computeRadiusUsingRegression(acc, 5,xVec, yVec, delta1, delta2, slopes, isFlat, regressionG, regressionF, sigmas, chiSquared);
+
+        //extra chi squared cuts!
+        if(regressionRadius < 5.0f/(2.f * k2Rinv1GeVf))
+        {
+            pass = pass and passChiSquaredConstraint(modulesInGPU, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5, chiSquared);
+            if(not pass) return pass;
+        }
+
+        //compute the other chisquared
+        //non anchor is always shifted for tilted and endcap!
+        float nonAnchorDelta1[5], nonAnchorDelta2[5], nonAnchorSlopes[5];
+        float nonAnchorxs[] = { mdsInGPU.outerX[firstMDIndex], mdsInGPU.outerX[secondMDIndex], mdsInGPU.outerX[thirdMDIndex], mdsInGPU.outerX[fourthMDIndex], mdsInGPU.outerX[fifthMDIndex]};
+        float nonAnchorys[] = { mdsInGPU.outerY[firstMDIndex], mdsInGPU.outerY[secondMDIndex], mdsInGPU.outerY[thirdMDIndex], mdsInGPU.outerY[fourthMDIndex], mdsInGPU.outerY[fifthMDIndex]};
+
+        computeSigmasForRegression(acc, modulesInGPU, lowerModuleIndices, nonAnchorDelta1, nonAnchorDelta2, nonAnchorSlopes, isFlat, 5, false);
+        nonAnchorChiSquared = computeChiSquared(acc, 5, nonAnchorxs, nonAnchorys, nonAnchorDelta1, nonAnchorDelta2, nonAnchorSlopes, isFlat, regressionG, regressionF, regressionRadius);
+        return pass;
+    };
+
+    struct createQuintupletsInGPUv2
+    {
+        template<typename TAcc>
+        ALPAKA_FN_ACC void operator()(
+                TAcc const & acc,
+                struct SDL::modules& modulesInGPU,
+                struct SDL::miniDoublets& mdsInGPU,
+                struct SDL::segments& segmentsInGPU,
+                struct SDL::triplets& tripletsInGPU,
+                struct SDL::quintuplets& quintupletsInGPU,
+                struct SDL::objectRanges& rangesInGPU,
+                uint16_t nEligibleT5Modules) const
+        {
+            using Dim = alpaka::Dim<TAcc>;
+            using Idx = alpaka::Idx<TAcc>;
+            using Vec = alpaka::Vec<Dim, Idx>;
+
+            Vec const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
+            Vec const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
+
+            for (int iter = globalThreadIdx[0]; iter < nEligibleT5Modules; iter += gridThreadExtent[0])
+            {
+                uint16_t lowerModule1 = rangesInGPU.indicesOfEligibleT5Modules[iter];
+                unsigned int nInnerTriplets = tripletsInGPU.nTriplets[lowerModule1];
+                for( unsigned int innerTripletArrayIndex = globalThreadIdx[1]; innerTripletArrayIndex < nInnerTriplets; innerTripletArrayIndex += gridThreadExtent[1])
+                {
+                    unsigned int innerTripletIndex = rangesInGPU.tripletModuleIndices[lowerModule1] + innerTripletArrayIndex;
+                    uint16_t lowerModule2 = tripletsInGPU.lowerModuleIndices[3 * innerTripletIndex + 1];
+                    uint16_t lowerModule3 = tripletsInGPU.lowerModuleIndices[3 * innerTripletIndex + 2];
+                    unsigned int nOuterTriplets = tripletsInGPU.nTriplets[lowerModule3];
+                    for (int outerTripletArrayIndex = globalThreadIdx[2]; outerTripletArrayIndex < nOuterTriplets; outerTripletArrayIndex += gridThreadExtent[2])
+                    {
+                        unsigned int outerTripletIndex = rangesInGPU.tripletModuleIndices[lowerModule3] + outerTripletArrayIndex;
+                        uint16_t lowerModule4 = tripletsInGPU.lowerModuleIndices[3 * outerTripletIndex + 1];
+                        uint16_t lowerModule5 = tripletsInGPU.lowerModuleIndices[3 * outerTripletIndex + 2];
+
+                        float innerRadius, outerRadius, bridgeRadius, regressionG, regressionF, regressionRadius, rzChiSquared, chiSquared, nonAnchorChiSquared; //required for making distributions
+
+                        bool success = runQuintupletDefaultAlgo(acc, modulesInGPU, mdsInGPU, segmentsInGPU, tripletsInGPU, lowerModule1, lowerModule2, lowerModule3, lowerModule4, lowerModule5, innerTripletIndex, outerTripletIndex, innerRadius, outerRadius,  bridgeRadius, regressionG, regressionF, regressionRadius, rzChiSquared, chiSquared, nonAnchorChiSquared);
+
+                        if(success)
+                        {
+                            short layer2_adjustment;
+                            int layer = modulesInGPU.layers[lowerModule1];
+                            if(layer == 1)
+                            {
+                                layer2_adjustment = 1;
+                            } // get upper segment to be in second layer
+                            else if(layer == 2)
+                            {
+                                layer2_adjustment = 0;
+                            } // get lower segment to be in second layer
+                            else
+                            {
+                                return;
+                            } // ignore anything else TODO: move this to start, before object is made (faster)
+                            int totOccupancyQuintuplets = alpaka::atomicOp<alpaka::AtomicAdd>(acc, &quintupletsInGPU.totOccupancyQuintuplets[lowerModule1], 1);
+                            if(totOccupancyQuintuplets >= (rangesInGPU.quintupletModuleIndices[lowerModule1 + 1] - rangesInGPU.quintupletModuleIndices[lowerModule1]))
+                            {
+#ifdef Warnings
+                                printf("Quintuplet excess alert! Module index = %d\n", lowerModule1);
 #endif
+                            }
+                            else
+                            {
+                                int quintupletModuleIndex = alpaka::atomicOp<alpaka::AtomicAdd>(acc, &quintupletsInGPU.nQuintuplets[lowerModule1], 1);
+                                //this if statement should never get executed!
+                                if(rangesInGPU.quintupletModuleIndices[lowerModule1] == -1)
+                                {
+                                    printf("Quintuplets : no memory for module at module index = %d\n", lowerModule1);
+                                }
+                                else
+                                {
+                                    unsigned int quintupletIndex = rangesInGPU.quintupletModuleIndices[lowerModule1] +  quintupletModuleIndex;
+                                    float phi = mdsInGPU.anchorPhi[segmentsInGPU.mdIndices[2*tripletsInGPU.segmentIndices[2*innerTripletIndex+layer2_adjustment]]];
+                                    float eta = mdsInGPU.anchorEta[segmentsInGPU.mdIndices[2*tripletsInGPU.segmentIndices[2*innerTripletIndex+layer2_adjustment]]];
+                                    float pt = (innerRadius+outerRadius)*3.8f*1.602f/(2*100*5.39f);
+                                    float scores = chiSquared + nonAnchorChiSquared;
+                                    addQuintupletToMemory(tripletsInGPU, quintupletsInGPU, innerTripletIndex, outerTripletIndex, lowerModule1, lowerModule2, lowerModule3, lowerModule4, lowerModule5, innerRadius, bridgeRadius, outerRadius, regressionG, regressionF, regressionRadius, rzChiSquared, chiSquared, nonAnchorChiSquared, pt,eta,phi,scores,layer,quintupletIndex);
+
+                                    tripletsInGPU.partOfT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex]] = true;
+                                    tripletsInGPU.partOfT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1]] = true;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    };
+
+    struct createEligibleModulesListForQuintupletsGPU
+    {
+        template<typename TAcc>
+        ALPAKA_FN_ACC void operator()(
+                TAcc const & acc,
+                struct modules& modulesInGPU,
+                struct triplets& tripletsInGPU,
+                struct objectRanges& rangesInGPU,
+                unsigned int* device_nTotalQuintuplets) const
+        {
+            using Dim = alpaka::Dim<TAcc>;
+            using Idx = alpaka::Idx<TAcc>;
+            using Vec = alpaka::Vec<Dim, Idx>;
+
+            Vec const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
+            Vec const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
+
+            // Initialize variables in shared memory and set to 0
+            int& nEligibleT5Modulesx = alpaka::declareSharedVar<int, __COUNTER__>(acc);
+            int& nTotalQuintupletsx = alpaka::declareSharedVar<int, __COUNTER__>(acc);
+            nTotalQuintupletsx = 0; nEligibleT5Modulesx = 0;
+            alpaka::syncBlockThreads(acc);
+
+            unsigned int category_number, eta_number;
+            for(int i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2])
+            {
+                // Condition for a quintuple to exist for a module
+                // TCs don't exist for layers 5 and 6 barrel, and layers 2,3,4,5 endcap   
+                unsigned int layers = modulesInGPU.layers[i];
+                unsigned int subdets = modulesInGPU.subdets[i];
+                unsigned int rings = modulesInGPU.rings[i];
+                float eta = modulesInGPU.eta[i];
+                float abs_eta = alpaka::math::abs(acc, eta);
+                int occupancy = 0;
+
+                if (tripletsInGPU.nTriplets[i] == 0) continue;
+                if (subdets == SDL::Barrel and layers >= 3) continue;
+                if (subdets == SDL::Endcap and layers > 1) continue;
+
+                int nEligibleT5Modules = alpaka::atomicOp<alpaka::AtomicAdd>(acc, &nEligibleT5Modulesx, 1);
+
+                if (layers<=3 && subdets==5) category_number = 0;
+                else if (layers>=4 && subdets==5) category_number = 1;
+                else if (layers<=2 && subdets==4 && rings>=11) category_number = 2;
+                else if (layers>=3 && subdets==4 && rings>=8) category_number = 2;
+                else if (layers<=2 && subdets==4 && rings<=10) category_number = 3;
+                else if (layers>=3 && subdets==4 && rings<=7) category_number = 3;
+
+                if (abs_eta<0.75) eta_number=0;
+                else if (abs_eta>0.75 && abs_eta<1.5) eta_number=1;
+                else if (abs_eta>1.5 && abs_eta<2.25) eta_number=2;
+                else if (abs_eta>2.25 && abs_eta<3) eta_number=3;
+
+                if (category_number == 0 && eta_number == 0) occupancy = 336;
+                else if (category_number == 0 && eta_number == 1) occupancy = 414;
+                else if (category_number == 0 && eta_number == 2) occupancy = 231;
+                else if (category_number == 0 && eta_number == 3) occupancy = 146;
+                else if (category_number == 3 && eta_number == 1) occupancy = 0;
+                else if (category_number == 3 && eta_number == 2) occupancy = 191;
+                else if (category_number == 3 && eta_number == 3) occupancy = 106;
+
+                int nTotQ = alpaka::atomicOp<alpaka::AtomicAdd>(acc, &nTotalQuintupletsx, occupancy);
+                rangesInGPU.quintupletModuleIndices[i] = nTotQ;
+                rangesInGPU.indicesOfEligibleT5Modules[nEligibleT5Modules] = i;
+            }
+
+            // Wait for all threads to finish before reporting final values
+            alpaka::syncBlockThreads(acc);
+            if(globalThreadIdx[2] == 0)
+            {
+                *rangesInGPU.nEligibleT5Modules = static_cast<uint16_t>(nEligibleT5Modulesx);
+                *device_nTotalQuintuplets = static_cast<unsigned int>(nTotalQuintupletsx);
+            }
+        }
+    };
+}
+#endif
\ No newline at end of file
diff --git a/code/core/write_sdl_ntuple.cc b/code/core/write_sdl_ntuple.cc
index 4258a597..824dd9db 100644
--- a/code/core/write_sdl_ntuple.cc
+++ b/code/core/write_sdl_ntuple.cc
@@ -402,7 +402,7 @@ void setQuintupletOutputBranches(SDL::Event* event)
 
     for (unsigned int lowerModuleIdx = 0; lowerModuleIdx < *(modulesInGPU.nLowerModules); ++lowerModuleIdx)
     {
-        unsigned int nQuintuplets = quintupletsInGPU.nQuintuplets[lowerModuleIdx];
+        int nQuintuplets = quintupletsInGPU.nQuintuplets[lowerModuleIdx];
         for (unsigned int idx = 0; idx < nQuintuplets; idx++)
         {
             unsigned int quintupletIndex = rangesInGPU.quintupletModuleIndices[lowerModuleIdx] + idx;