From b890cb79426b25b2a77e99d919acf161ecc3b59b Mon Sep 17 00:00:00 2001 From: Richard Chapman Date: Wed, 6 Sep 2023 13:11:47 +0100 Subject: [PATCH] HPCC-30156 Instrument CriticalSection Moved into jmutex Signed-off-by: Richard Chapman --- system/jlib/jdebug.hpp | 73 +--------------- system/jlib/jmutex.cpp | 22 +++++ system/jlib/jmutex.hpp | 149 +++++++++++++++++++++++++++---- system/jlib/jtiming.hpp | 97 +++++++++++++++++++++ testing/unittests/unittests.cpp | 150 ++------------------------------ 5 files changed, 257 insertions(+), 234 deletions(-) create mode 100644 system/jlib/jtiming.hpp diff --git a/system/jlib/jdebug.hpp b/system/jlib/jdebug.hpp index 6c89b35b0ab..4834c15478c 100644 --- a/system/jlib/jdebug.hpp +++ b/system/jlib/jdebug.hpp @@ -23,78 +23,7 @@ #include "jiface.hpp" #include "jstats.h" #include - -#define TIMING - -__int64 jlib_decl cycle_to_nanosec(cycle_t cycles); -__int64 jlib_decl cycle_to_microsec(cycle_t cycles); -__int64 jlib_decl cycle_to_millisec(cycle_t cycles); -cycle_t jlib_decl nanosec_to_cycle(__int64 cycles); -cycle_t jlib_decl millisec_to_cycle(unsigned ms); -double jlib_decl getCycleToNanoScale(); -void jlib_decl display_time(const char * title, cycle_t diff); - -// X86 / X86_64 -#if defined(_ARCH_X86_64_) || defined(_ARCH_X86_) - -#define HAS_GOOD_CYCLE_COUNTER - -#if defined(_WIN32) && defined (_ARCH_X86_) -#pragma warning(push) -#pragma warning(disable:4035) -inline cycle_t getTSC() { __asm { __asm _emit 0x0f __asm _emit 0x31 } } -#pragma warning(pop) -#elif !defined(_WIN32) -inline cycle_t getTSC() { return __builtin_ia32_rdtsc(); } -#else -#include -inline cycle_t getTSC() { return __rdtsc(); } -#endif // WIN32 - -#elif defined(_ARCH_PPC) - -#define HAS_GOOD_CYCLE_COUNTER - -static inline cycle_t getTSC() -{ - int64_t result; -#ifdef _ARCH_PPC64 - /* - This reads timebase in one 64bit go. Does *not* include a workaround for the cell (see - http://ozlabs.org/pipermail/linuxppc-dev/2006-October/027052.html) - */ - __asm__ volatile( - "mftb %0" - : "=r" (result)); -#else - /* - Read the high 32bits of the timer, then the lower, and repeat if high order has changed in the meantime. See - http://ozlabs.org/pipermail/linuxppc-dev/1999-October/003889.html - */ - unsigned long dummy; - __asm__ volatile( - "mfspr %1,269\n\t" /* mftbu */ - "mfspr %L0,268\n\t" /* mftb */ - "mfspr %0,269\n\t" /* mftbu */ - "cmpw %0,%1\n\t" /* check if the high order word has chanegd */ - "bne $-16" - : "=r" (result), "=r" (dummy)); -#endif - return result; -} - -#else -// ARMFIX: cycle-count is not always available in user mode -// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0338g/Bihbeabc.html -// http://neocontra.blogspot.co.uk/2013/05/user-mode-performance-counters-for.html -inline cycle_t getTSC() { return 0; } -#endif // X86 - -#if defined(INLINE_GET_CYCLES_NOW) && defined(HAS_GOOD_CYCLE_COUNTER) -inline cycle_t get_cycles_now() { return getTSC(); } -#else -cycle_t jlib_decl get_cycles_now(); // equivalent to getTSC when available -#endif +#include struct HardwareInfo { diff --git a/system/jlib/jmutex.cpp b/system/jlib/jmutex.cpp index 8d1fe0b369a..670158a5d2f 100644 --- a/system/jlib/jmutex.cpp +++ b/system/jlib/jmutex.cpp @@ -118,6 +118,28 @@ int Mutex::unlockAll() return ret; } +static SpinLock sl; +void InstrumentedCriticalSection::reportContended(bool destructor) +{ + if (contended*2 > uncontended && contended % 1000 == 0) + { + SpinBlock block(sl); + DBGLOG("Heavily contended critsec %u/%u", contended, contended+uncontended); + printStackReport(); + } + else if (destructor && contended > 1000) + { + SpinBlock block(sl); + DBGLOG("Contended critsec %u/%u", contended, contended+uncontended); + } + if (destructor && (waitCycles || holdCycles)) + { + DBGLOG("Total wait time for CritSec: %" I64F "u ms", cycle_to_millisec(waitCycles)); + DBGLOG("Total hold time for CritSec: %" I64F "u ms", cycle_to_millisec(holdCycles)); + } +} + + diff --git a/system/jlib/jmutex.hpp b/system/jlib/jmutex.hpp index 836f26ded1b..abea2381f70 100644 --- a/system/jlib/jmutex.hpp +++ b/system/jlib/jmutex.hpp @@ -25,6 +25,7 @@ #include #include "jiface.hpp" #include "jsem.hpp" +#include "jtiming.hpp" extern jlib_decl void ThreadYield(); extern jlib_decl void spinUntilReady(std::atomic_uint &value); @@ -34,6 +35,9 @@ extern jlib_decl void spinUntilReady(std::atomic_uint &value); //#define SPINLOCK_USE_MUTEX // for testing #define SPINLOCK_RR_CHECK // checks for realtime threads #define _ASSERT_LOCK_SUPPORT +#define USE_INSTRUMENTED_CRITSECS +#elif defined(_PROFILE) +#define USE_INSTRUMENTED_CRITSECS #endif #ifdef SPINLOCK_USE_MUTEX @@ -195,7 +199,7 @@ TryEnterCriticalSection( ); }; -class jlib_decl CriticalSection +class jlib_decl UninstrumentedCriticalSection { // lightweight mutex within a single process private: @@ -255,12 +259,14 @@ class jlib_decl CriticalSection bool wouldBlock() { if (TryEnterCriticalSection(&flags)) { leave(); return false; } return true; } // debug only #endif }; + +typedef UninstrumentedCriticalSection InstrumentedCriticalSection; // Not worth implementing for Windows #else /** * Mutex locking wrapper. Use enter/leave to lock/unlock. */ -class CriticalSection +class UninstrumentedCriticalSection { private: MutexId mutex; @@ -268,9 +274,9 @@ class CriticalSection ThreadId owner; unsigned depth; #endif - CriticalSection (const CriticalSection &); + UninstrumentedCriticalSection (const UninstrumentedCriticalSection &) = delete; public: - inline CriticalSection() + inline UninstrumentedCriticalSection() { pthread_mutexattr_t attr; pthread_mutexattr_init(&attr); @@ -287,7 +293,7 @@ class CriticalSection #endif } - inline ~CriticalSection() + inline ~UninstrumentedCriticalSection() { #ifdef _ASSERT_LOCK_SUPPORT assert(owner==0 && depth==0); @@ -327,6 +333,92 @@ class CriticalSection #endif } }; + +class InstrumentedCriticalSection +{ +private: + MutexId mutex; +#ifdef _ASSERT_LOCK_SUPPORT + ThreadId owner = 0; +#endif + unsigned depth = 0; + cycle_t waitCycles = 0; + cycle_t holdCycles = 0; + cycle_t holdStart = 0; + unsigned contended = 0; + unsigned uncontended = 0; + bool wasContended = false; + InstrumentedCriticalSection (const InstrumentedCriticalSection &) = delete; +public: + inline InstrumentedCriticalSection() + { + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); +#ifdef _DEBUG + verifyex(pthread_mutexattr_settype(&attr,PTHREAD_MUTEX_RECURSIVE)==0); // verify supports attr +#else + pthread_mutexattr_settype(&attr,PTHREAD_MUTEX_RECURSIVE); +#endif + pthread_mutex_init(&mutex, &attr); + pthread_mutexattr_destroy(&attr); + } + + inline ~InstrumentedCriticalSection() + { +#ifdef _ASSERT_LOCK_SUPPORT + assert(owner==0); +#endif + assert(depth==0); + pthread_mutex_destroy(&mutex); + reportContended(true); + } + + inline void enter() + { + cycle_t start = get_cycles_now(); +#ifdef _ASSERT_LOCK_SUPPORT + ThreadId me = GetCurrentThreadId(); +#endif + bool isContended = depth != 0; + pthread_mutex_lock(&mutex); + depth++; + holdStart = get_cycles_now(); + waitCycles += holdStart-start; + if (isContended) + contended++; + else + uncontended++; + wasContended = isContended; +#ifdef _ASSERT_LOCK_SUPPORT + owner = me; +#endif + } + + inline void leave() + { + bool isContended = false; + if (depth==1) + { +#ifdef _ASSERT_LOCK_SUPPORT + owner = 0; +#endif + isContended = wasContended; + holdCycles += get_cycles_now()-holdStart; + } + depth--; + pthread_mutex_unlock(&mutex); + if (isContended) + reportContended(false); + } + inline void assertLocked() + { +#ifdef _ASSERT_LOCK_SUPPORT + assertex(owner == GetCurrentThreadId()); +#endif + } + void reportContended(bool destructor); +}; + #endif /** @@ -334,42 +426,49 @@ class CriticalSection * the lock on a critical section (parameter). * Blocks on construction, unblocks on destruction. */ -class CriticalBlock + +template class CriticalBlockOf { - CriticalSection &crit; + T &crit; public: - inline CriticalBlock(CriticalSection &c) : crit(c) { crit.enter(); } - inline ~CriticalBlock() { crit.leave(); } + inline CriticalBlockOf(T &c) : crit(c) { crit.enter(); } + inline ~CriticalBlockOf() { crit.leave(); } }; +typedef CriticalBlockOf UninstrumentedCriticalBlock; +typedef CriticalBlockOf InstrumentedCriticalBlock; + /** * Critical section delimiter, using scope to define lifetime of * the lock on a critical section (parameter). * Unblocks on construction, blocks on destruction. */ -class CriticalUnblock +template class CriticalUnblockOf { - CriticalSection &crit; + T &crit; public: - inline CriticalUnblock(CriticalSection &c) : crit(c) { crit.leave(); } - inline ~CriticalUnblock() { crit.enter(); } + inline CriticalUnblockOf(T &c) : crit(c) { crit.leave(); } + inline ~CriticalUnblockOf() { crit.enter(); } }; -class CLeavableCriticalBlock +typedef CriticalUnblockOf UninstrumentedCriticalUnblock; +typedef CriticalUnblockOf InstrumentedCriticalUnblock; + +template class CLeavableCriticalBlockOf { - CriticalSection &crit; + T &crit; bool locked = false; public: - inline CLeavableCriticalBlock(CriticalSection &_crit) : crit(_crit) + inline CLeavableCriticalBlockOf(T &_crit) : crit(_crit) { enter(); } - inline CLeavableCriticalBlock(CriticalSection &_crit, bool lock) : crit(_crit) + inline CLeavableCriticalBlockOf(T &_crit, bool lock) : crit(_crit) { if (lock) enter(); } - inline ~CLeavableCriticalBlock() + inline ~CLeavableCriticalBlockOf() { if (locked) crit.leave(); @@ -391,6 +490,20 @@ class CLeavableCriticalBlock } }; +typedef CLeavableCriticalBlockOf UninstrumentedLeavableCriticalBlock; +typedef CLeavableCriticalBlockOf InstrumentedLeavableCriticalBlock; + +#ifdef USE_INSTRUMENTED_CRITSECS +typedef InstrumentedCriticalSection CriticalSection; +typedef InstrumentedCriticalBlock CriticalBlock; +typedef InstrumentedCriticalBlock CriticalUnblock; +typedef InstrumentedLeavableCriticalBlock CLeavableCriticalBlock; +#else +typedef UninstrumentedCriticalSection CriticalSection; +typedef UninstrumentedCriticalBlock CriticalBlock; +typedef UninstrumentedCriticalBlock CriticalUnblock; +typedef UninstrumentedLeavableCriticalBlock CLeavableCriticalBlock; +#endif #ifdef SPINLOCK_USE_MUTEX // for testing diff --git a/system/jlib/jtiming.hpp b/system/jlib/jtiming.hpp new file mode 100644 index 00000000000..1c2216480de --- /dev/null +++ b/system/jlib/jtiming.hpp @@ -0,0 +1,97 @@ +/*############################################################################## + + HPCC SYSTEMS software Copyright (C) 2023 HPCC Systems®. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +############################################################################## */ + + + +#ifndef JTIMING_HPP +#define JTIMING_HPP + +#include "jiface.hpp" + +#define TIMING + +__int64 jlib_decl cycle_to_nanosec(cycle_t cycles); +__int64 jlib_decl cycle_to_microsec(cycle_t cycles); +__int64 jlib_decl cycle_to_millisec(cycle_t cycles); +cycle_t jlib_decl nanosec_to_cycle(__int64 cycles); +cycle_t jlib_decl millisec_to_cycle(unsigned ms); +double jlib_decl getCycleToNanoScale(); +void jlib_decl display_time(const char * title, cycle_t diff); + +// X86 / X86_64 +#if defined(_ARCH_X86_64_) || defined(_ARCH_X86_) + +#define HAS_GOOD_CYCLE_COUNTER + +#if defined(_WIN32) && defined (_ARCH_X86_) +#pragma warning(push) +#pragma warning(disable:4035) +inline cycle_t getTSC() { __asm { __asm _emit 0x0f __asm _emit 0x31 } } +#pragma warning(pop) +#elif !defined(_WIN32) +inline cycle_t getTSC() { return __builtin_ia32_rdtsc(); } +#else +#include +inline cycle_t getTSC() { return __rdtsc(); } +#endif // WIN32 + +#elif defined(_ARCH_PPC) + +#define HAS_GOOD_CYCLE_COUNTER + +static inline cycle_t getTSC() +{ + int64_t result; +#ifdef _ARCH_PPC64 + /* + This reads timebase in one 64bit go. Does *not* include a workaround for the cell (see + http://ozlabs.org/pipermail/linuxppc-dev/2006-October/027052.html) + */ + __asm__ volatile( + "mftb %0" + : "=r" (result)); +#else + /* + Read the high 32bits of the timer, then the lower, and repeat if high order has changed in the meantime. See + http://ozlabs.org/pipermail/linuxppc-dev/1999-October/003889.html + */ + unsigned long dummy; + __asm__ volatile( + "mfspr %1,269\n\t" /* mftbu */ + "mfspr %L0,268\n\t" /* mftb */ + "mfspr %0,269\n\t" /* mftbu */ + "cmpw %0,%1\n\t" /* check if the high order word has chanegd */ + "bne $-16" + : "=r" (result), "=r" (dummy)); +#endif + return result; +} + +#else +// ARMFIX: cycle-count is not always available in user mode +// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0338g/Bihbeabc.html +// http://neocontra.blogspot.co.uk/2013/05/user-mode-performance-counters-for.html +inline cycle_t getTSC() { return 0; } +#endif // X86 + +#if defined(INLINE_GET_CYCLES_NOW) && defined(HAS_GOOD_CYCLE_COUNTER) +inline cycle_t get_cycles_now() { return getTSC(); } +#else +cycle_t jlib_decl get_cycles_now(); // equivalent to getTSC when available +#endif + +#endif \ No newline at end of file diff --git a/testing/unittests/unittests.cpp b/testing/unittests/unittests.cpp index a33632d9169..f39c4bf4c5b 100644 --- a/testing/unittests/unittests.cpp +++ b/testing/unittests/unittests.cpp @@ -928,144 +928,6 @@ CPPUNIT_TEST_SUITE_REGISTRATION( PipeRunTest ); CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( PipeRunTest, "PipeRunTest" ); #endif -#ifndef _WIN32 -//#define TIME_CRITSECS -#define CHECK_CS_CONTENTION - -class InstrumentedCriticalSection -{ -private: - MutexId mutex; -#ifdef _ASSERT_LOCK_SUPPORT - ThreadId owner = 0; -#endif - unsigned depth = 0; -#ifdef TIME_CRITSECS - cycle_t waitCycles = 0; - cycle_t holdCycles = 0; - cycle_t holdStart = 0; -#endif -#ifdef CHECK_CS_CONTENTION - unsigned contended = 0; - unsigned uncontended = 0; - bool wasContended = false; -#endif - InstrumentedCriticalSection (const CriticalSection &); -public: - inline InstrumentedCriticalSection() - { - pthread_mutexattr_t attr; - pthread_mutexattr_init(&attr); -#ifdef _DEBUG - verifyex(pthread_mutexattr_settype(&attr,PTHREAD_MUTEX_RECURSIVE)==0); // verify supports attr -#else - pthread_mutexattr_settype(&attr,PTHREAD_MUTEX_RECURSIVE); -#endif - pthread_mutex_init(&mutex, &attr); - pthread_mutexattr_destroy(&attr); - } - - inline ~InstrumentedCriticalSection() - { -#ifdef _ASSERT_LOCK_SUPPORT - assert(owner==0); -#endif - assert(depth==0); - pthread_mutex_destroy(&mutex); -#ifdef TIME_CRITSECS - if (waitCycles || holdCycles) - { - DBGLOG("Total wait time for CritSec: %" I64F "u ms", cycle_to_millisec(waitCycles)); - DBGLOG("Total hold time for CritSec: %" I64F "u ms", cycle_to_millisec(holdCycles)); - } -#endif -#ifdef CHECK_CS_CONTENTION - reportContended(true); -#endif - } - - inline void enter() - { - cycle_t start = get_cycles_now(); -#ifdef CHECK_CS_CONTENTION - bool isContended = depth != 0; -#endif -#ifdef _ASSERT_LOCK_SUPPORT - ThreadId me = GetCurrentThreadId(); -#endif - pthread_mutex_lock(&mutex); -#ifdef TIME_CRITSECS - holdStart = get_cycles_now(); - waitCycles += holdStart-start; -#endif - depth++; -#ifdef CHECK_CS_CONTENTION - if (isContended) - contended++; - else - uncontended++; - wasContended = isContended; -#endif -#ifdef _ASSERT_LOCK_SUPPORT - owner = me; -#endif - } - - inline void leave() - { - bool isContended = false; - depth--; - if (!depth) - { -#ifdef _ASSERT_LOCK_SUPPORT - owner = 0; -#endif -#ifdef CHECK_CS_CONTENTION - isContended = wasContended; -#endif -#ifdef TIME_CRITSECS - holdCycles += get_cycles_now()-holdStart; -#endif - } - pthread_mutex_unlock(&mutex); - if (isContended) - reportContended(false); - } - inline void assertLocked() - { -#ifdef _ASSERT_LOCK_SUPPORT - assertex(owner == GetCurrentThreadId()); -#endif - } -#ifdef CHECK_CS_CONTENTION - void reportContended(bool destructor); -#endif -}; - -static SpinLock sl; -void InstrumentedCriticalSection::reportContended(bool destructor) -{ - if (contended*2 > uncontended && contended % 1000 == 0) - { - SpinBlock block(sl); - DBGLOG("Heavily contended critsec %u/%u", contended, contended+uncontended); - printStackReport(); - } - else if (destructor && contended > 1000) - { - SpinBlock block(sl); - DBGLOG("Contended critsec %u/%u", contended, contended+uncontended); - } -} - -class InstrumentedCriticalBlock -{ - InstrumentedCriticalSection &crit; -public: - inline InstrumentedCriticalBlock(InstrumentedCriticalSection &c) : crit(c) { crit.enter(); } - inline ~InstrumentedCriticalBlock() { crit.leave(); } -}; -#endif class RelaxedAtomicTimingTest : public CppUnit::TestFixture { @@ -1077,7 +939,7 @@ class RelaxedAtomicTimingTest : public CppUnit::TestFixture { // testRunner(0); // testRunner(1); -// testRunner(2); + testRunner(2); testRunner(3); // testRunner(4); // testRunner(5); @@ -1088,7 +950,7 @@ class RelaxedAtomicTimingTest : public CppUnit::TestFixture CCycleTimer timer; unsigned count = 100000000; RelaxedAtomic ra[201]; - CriticalSection lock[201]; + UninstrumentedCriticalSection lock[201]; InstrumentedCriticalSection ilock[201]; for (int a = 0; a < 201; a++) @@ -1097,7 +959,7 @@ class RelaxedAtomicTimingTest : public CppUnit::TestFixture class T : public CThreaded { public: - T(unsigned _count, RelaxedAtomic &_ra, CriticalSection &_lock, InstrumentedCriticalSection &_ilock, unsigned _mode) + T(unsigned _count, RelaxedAtomic &_ra, UninstrumentedCriticalSection &_lock, InstrumentedCriticalSection &_ilock, unsigned _mode) : CThreaded(""), count(_count), ra(_ra), lock(_lock), ilock(_ilock), mode(_mode) {} virtual int run() override @@ -1130,7 +992,7 @@ class RelaxedAtomicTimingTest : public CppUnit::TestFixture int &a = (int &) ra; while (count--) { - CriticalBlock b(lock); + UninstrumentedCriticalBlock b(lock); a++; } } @@ -1148,7 +1010,7 @@ class RelaxedAtomicTimingTest : public CppUnit::TestFixture RelaxedAtomic &a = ra; while (count--) { - CriticalBlock b(lock); + UninstrumentedCriticalBlock b(lock); a.fastAdd(1); } } @@ -1166,7 +1028,7 @@ class RelaxedAtomicTimingTest : public CppUnit::TestFixture unsigned mode; unsigned count; RelaxedAtomic &ra; - CriticalSection &lock; + UninstrumentedCriticalSection &lock; InstrumentedCriticalSection &ilock; } t1a(count, ra[0], lock[0], ilock[0], mode), t2a(count, ra[0], lock[0], ilock[0], mode), t3a(count, ra[0], lock[0], ilock[0], mode), t1b(count, ra[0], lock[1], ilock[1], mode), t2b(count, ra[1], lock[2], ilock[2], mode), t3b(count, ra[2], lock[3], ilock[3], mode),