diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc index 67f863c1d0..fb1fff1598 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc @@ -652,7 +652,7 @@ main( int argc, char** argv ) // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** wv3acount += timermap.stop(); // calc only - wavecount += wv3acount; // calc plus copy + wavecount += wv3acount; // calc plus copy #ifdef MGONGPUCPP_GPUIMPL if( !bridge ) @@ -736,7 +736,7 @@ main( int argc, char** argv ) wavetimes[iiter] = wavecounts[iiter] * secPerCount; wv3atimes[iiter] = wv3acounts[iiter] * secPerCount; } - + // === STEP 8 ANALYSIS // --- 8a Analysis: compute stats after the loop const std::string statKey = "8a CompStat"; diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/counters.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/counters.cc index d4805d6556..01dacc3269 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/counters.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/counters.cc @@ -59,11 +59,13 @@ extern "C" if( getenv( "CUDACPP_RUNTIME_USECHRONOTIMERS" ) ) usechronotimers = true; for( int icounter = 0; icounter < NCOUNTERSMAX + 3; icounter++ ) { - array_tags[icounter] = ""; // ensure that this is initialized to "" + array_tags[icounter] = ""; // ensure that this is initialized to "" array_istesttimer[icounter] = false; // ensure that this is initialized to false } - if( usechronotimers ) program_chronotimer.start(); - else program_rdtsctimer.start(); + if( usechronotimers ) + program_chronotimer.start(); + else + program_rdtsctimer.start(); return; } @@ -112,8 +114,10 @@ extern "C" throw std::runtime_error( sstr.str() ); } array_counters[icounter] += *pnevt; - if( usechronotimers ) array_chronotimers[icounter].start(); - else array_rdtsctimers[icounter].start(); + if( usechronotimers ) + array_chronotimers[icounter].start(); + else + array_rdtsctimers[icounter].start(); return; } @@ -129,8 +133,10 @@ extern "C" sstr << "ERROR! counter #" << icounter << " does not exist"; throw std::runtime_error( sstr.str() ); } - if( usechronotimers ) array_chronotimers[icounter].stop(); - else array_rdtsctimers[icounter].stop(); + if( usechronotimers ) + array_chronotimers[icounter].stop(); + else + array_rdtsctimers[icounter].stop(); return; } @@ -138,11 +144,15 @@ extern "C" { using namespace counters; // Dump program counters - if( usechronotimers ) program_chronotimer.stop(); - else program_rdtsctimer.stop(); + if( usechronotimers ) + program_chronotimer.stop(); + else + program_rdtsctimer.stop(); float program_totaltime = ( usechronotimers ? program_chronotimer.getTotalDurationSeconds() : program_rdtsctimer.getTotalDurationSeconds() ); - if( usechronotimers ) printf( " [COUNTERS] *** USING STD::CHRONO TIMERS ***\n" ); - else printf( " [COUNTERS] *** USING RDTSC-BASED TIMERS ***\n" ); + if( usechronotimers ) + printf( " [COUNTERS] *** USING STD::CHRONO TIMERS ***\n" ); + else + printf( " [COUNTERS] *** USING RDTSC-BASED TIMERS ***\n" ); printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); if( disablecalltimers ) return; // Extract time duration from all timers diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/timer.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/timer.h index b65849b9c4..8132335701 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/timer.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/timer.h @@ -21,13 +21,13 @@ namespace mgOnGpu { // --------------------------------------------------------------------------- - + // ChronoTimer: default ("old") timers based on std::chrono clocks // With respect to the original Timer class, this uses a new implementation with nanosecond counts // With respect to the original Timer class, this also uses a new API with explicit start/stop // Template argument T can be any of high_resolution_clock, steady_clock, system_clock // See https://www.modernescpp.com/index.php/the-three-clocks - // See https://codereview.stackexchange.com/questions/196245/extremely-simple-timer-class-in-c + // See https://codereview.stackexchange.com/questions/196245/extremely-simple-timer-class-in-c template class ChronoTimer { @@ -50,8 +50,7 @@ namespace mgOnGpu }; template - inline - ChronoTimer::ChronoTimer() + inline ChronoTimer::ChronoTimer() : m_totalDuration() , m_started( false ) , m_startTime() @@ -62,8 +61,7 @@ namespace mgOnGpu } template - inline - void + inline void ChronoTimer::start() { assert( !m_started ); @@ -72,8 +70,7 @@ namespace mgOnGpu } template - inline - void + inline void ChronoTimer::stop() { assert( m_started ); @@ -82,32 +79,29 @@ namespace mgOnGpu } template - inline - uint64_t + inline uint64_t ChronoTimer::getCountsSinceStart() const { return getDurationSinceStart().count(); } - + template inline - typename ChronoTimer::DURATION - ChronoTimer::getDurationSinceStart() const + typename ChronoTimer::DURATION + ChronoTimer::getDurationSinceStart() const { return T::now() - m_startTime; } - + template - inline - float + inline float ChronoTimer::secondsPerCount() const { return (float)RATIO::num / RATIO::den; } - + template - inline - float + inline float ChronoTimer::getTotalDurationSeconds() { assert( !m_started ); @@ -116,7 +110,7 @@ namespace mgOnGpu } // --------------------------------------------------------------------------- - + // RdtscTimer: faster ("new") *EXPERIMENTAL* timers based on rdtsc // The rdtsc() call is derived from the TSCNS class (https://github.com/MengRao/tscns) // The conversion of rdtsc counts to seconds is calibrated on the average frequency during the timer lifetime @@ -143,8 +137,7 @@ namespace mgOnGpu uint64_t m_ctorCount; }; - inline - uint64_t + inline uint64_t RdtscTimer::rdtsc() { #if defined( __x86_64__ ) @@ -154,8 +147,7 @@ namespace mgOnGpu #endif } - inline - RdtscTimer::RdtscTimer() + inline RdtscTimer::RdtscTimer() : m_totalDuration( 0 ) , m_started( false ) , m_startCount( 0 ) @@ -166,8 +158,7 @@ namespace mgOnGpu m_ctorCount = rdtsc(); } - inline - void + inline void RdtscTimer::start() { assert( !m_started ); @@ -175,8 +166,7 @@ namespace mgOnGpu m_startCount = rdtsc(); } - inline - void + inline void RdtscTimer::stop() { assert( m_started ); @@ -184,15 +174,13 @@ namespace mgOnGpu m_totalDuration += getCountsSinceStart(); } - inline - uint64_t + inline uint64_t RdtscTimer::getCountsSinceStart() const { return rdtsc() - m_startCount; } - inline - float + inline float RdtscTimer::secondsPerCount() { m_ctorTimer.stop(); @@ -201,8 +189,7 @@ namespace mgOnGpu return secPerCount; } - inline - float + inline float RdtscTimer::getTotalDurationSeconds() { assert( !m_started ); diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/timermap.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/timermap.h index 627707fdba..61222e0ecc 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/timermap.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/timermap.h @@ -52,8 +52,10 @@ namespace mgOnGpu // Switch to a new partition if( !m_started ) { - if( m_useChronoTimers ) m_chronoTimer.start(); - else m_rdtscTimer.start(); + if( m_useChronoTimers ) + m_chronoTimer.start(); + else + m_rdtscTimer.start(); m_started = true; } m_active = key; @@ -75,11 +77,15 @@ namespace mgOnGpu uint64_t last = 0; if( m_active != "" ) { - if( m_useChronoTimers ) last = m_chronoTimer.getCountsSinceStart(); - else last = m_rdtscTimer.getCountsSinceStart(); + if( m_useChronoTimers ) + last = m_chronoTimer.getCountsSinceStart(); + else + last = m_rdtscTimer.getCountsSinceStart(); m_partitionTotalCounts[m_active] += last; - if( m_useChronoTimers ) m_chronoTimer.stop(); - else m_rdtscTimer.stop(); + if( m_useChronoTimers ) + m_chronoTimer.stop(); + else + m_rdtscTimer.stop(); m_started = false; } m_active = ""; @@ -92,9 +98,11 @@ namespace mgOnGpu // Return timer calibration (at this point in time for rdtsc, constant in time for chrono) float secondsPerCount() { - if( m_useChronoTimers ) return m_chronoTimer.secondsPerCount(); - else return m_rdtscTimer.secondsPerCount(); - } + if( m_useChronoTimers ) + return m_chronoTimer.secondsPerCount(); + else + return m_rdtscTimer.secondsPerCount(); + } // Dump the overall results void dump( std::ostream& ostr = std::cout, bool json = false ) @@ -189,7 +197,6 @@ namespace mgOnGpu std::map m_partitionIds; bool m_useChronoTimers; bool m_started; // when the timer is stopped, it must be explicitly restarted - }; }