diff --git a/CMakeLists.txt b/CMakeLists.txt index a926652d1..2eebcc6f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -228,6 +228,9 @@ option(SIMH_PACKAGE_SUFFIX option(MAC_UNIVERSAL "macOS universal binary flag: TRUE -> build universal binaries, FALSE -> don't." ${MAC_UNIVERSAL_OPTVAL}) +option(DONT_USE_AIO_INTRINSICS + "Don't use compiler/platform intrinsics for AIO, revert to lock-based AIO" + FALSE) # Places where CMake should look for dependent package configuration fragments and artifacts: set(SIMH_PREFIX_PATH_LIST) diff --git a/README-CMake.md b/README-CMake.md index 4e1780851..fdd83ef99 100644 --- a/README-CMake.md +++ b/README-CMake.md @@ -511,25 +511,22 @@ or video support. # List the supported command line flags: $ cmake/cmake-builder.sh --help - Configure and build simh simulators on Linux and *nix-like platforms. + ** cmake version 3.18.4 - Subdirectories: - cmake/build-unix: Makefile-based build simulators - cmake/build-ninja: Ninja build-based simulators + CMake suite maintained and supported by Kitware (kitware.com/cmake). + Configure and build simh simulators on Linux and *nix-like platforms. - Options: - -------- + Compile/Build options: + ---------------------- --clean (-x) Remove the build subdirectory --generate (-g) Generate the build environment, don't compile/build --parallel (-p) Enable build parallelism (parallel builds) - --nonetwork Build simulators without network support - --novideo Build simulators without video support --notest Do not execute 'ctest' test cases --noinstall Do not install SIMH simulators. --testonly Do not build, execute the 'ctest' test cases --installonly Do not build, install the SIMH simulators - --flavor (-f) Specifies the build flavor. Valid flavors are: + --flavor (-f) [Required] Specifies the build flavor. Valid flavors are: unix ninja xcode @@ -541,8 +538,7 @@ or video support. --config (-c) Specifies the build configuration: 'Release' or 'Debug' --target Build a specific simulator or simulators. Separate multiple - targets by separating with a comma, - e.g. "--target pdp8,pdp11,vax750,altairz80,3b2" + targets with a comma, e.g. "--target pdp8,pdp11,vax750,altairz80,3b2" --lto Enable Link Time Optimization (LTO) in Release builds --debugWall Enable maximal warnings in Debug builds --cppcheck Enable cppcheck static code analysis rules @@ -553,6 +549,17 @@ or video support. --verbose Turn on verbose build output + SIMH feature control options: + ----------------------------- + --nonetwork Build simulators without network support + --novideo Build simulators without video support + --no-aio-intrinsics + Do not use compiler/platform intrinsics to implement AIO + functions (aka "lock-free" AIO), reverts to lock-based AIO + if threading libraries are detected. + + Other options: + -------------- --help (-h) Print this help. ``` @@ -569,7 +576,7 @@ or video support. PS C:\...\open-simh> Get-Help -deatailed cmake\cmake-builder.ps1 NAME - C:\Users\bsm21317\play\open-simh\cmake\cmake-builder.ps1 + C:\...\play\open-simh\cmake\cmake-builder.ps1 SYNOPSIS Configure and build SIMH's dependencies and simulators using the Microsoft Visual @@ -577,9 +584,9 @@ or video support. SYNTAX - C:\Users\bsm21317\play\open-simh\cmake\cmake-builder.ps1 [[-flavor] ] [[-config] ] [[-cpack_suffix] ] [[-target] ] - [-clean] [-help] [-nonetwork] [-novideo] [-notest] [-noinstall] [-parallel] [-generate] [-regenerate] [-testonly] [-installOnly] [-windeprecation] - [-package] [-lto] [-debugWall] [-cppcheck] [] + C:\...\play\open-simh\cmake\cmake-builder.ps1 [[-flavor] ] [[-config] ] [[-cpack_suffix] ] [[-target] + ] [-clean] [-help] [-nonetwork] [-novideo] [-noaioinstrinsics] [-notest] [-noinstall] [-parallel] [-generate] [-testonly] + [-installOnly] [-windeprecation] [-lto] [-debugWall] [-cppcheck] [] DESCRIPTION @@ -588,9 +595,9 @@ or video support. 1. Configure and generate the build environment selected by '-flavor' option. 2. Build missing runtime dependencies and the simulator suite with the compiler - configuration selected by the '-config' option. The "Release" configuration - generates optimized executables; the "Debug" configuration generates - development executables with debugger information. + configuration selected by the '-config' option. The "Release" configuration + generates optimized executables; the "Debug" configuration generates + development executables with debugger information. 3. Test the simulators There is an install phase that can be invoked separately as part of the SIMH @@ -624,6 +631,9 @@ or video support. mingw-make MinGW GCC/mingw32-make mingw-ninja MinGW GCC/ninja + -config + The target build configuration. Valid values: "Release" and "Debug" + [...truncated for brevity...] ``` diff --git a/cmake/cmake-builder.ps1 b/cmake/cmake-builder.ps1 index f39dc77f0..de661c9fb 100644 --- a/cmake/cmake-builder.ps1 +++ b/cmake/cmake-builder.ps1 @@ -115,6 +115,11 @@ param ( [Parameter(Mandatory=$false)] [switch] $novideo = $false, + ## Compile the SIMH simulator without AIO instrinsics ("lock-free" AIO), + ## using lock-based AIO via thread mutexes instead. + [Parameter(Mandatory=$false)] + [switch] $noaiointrinsics = $false, + ## Disable the build's tests. [Parameter(Mandatory=$false)] [switch] $notest = $false, @@ -411,6 +416,10 @@ if (($scriptPhases -contains "generate") -or ($scriptPhases -contains "build")) { $generateArgs += @("-DWITH_VIDEO:Bool=Off") } + if ($noaiointrinsics) + { + $generateArgs += @("-DDONT_USE_AIO_INTRINSICS:Bool=On") + } if ($lto) { $generateArgs += @("-DRELEASE_LTO:Bool=On") diff --git a/cmake/cmake-builder.sh b/cmake/cmake-builder.sh index 0ea8d6eef..a6b66767c 100755 --- a/cmake/cmake-builder.sh +++ b/cmake/cmake-builder.sh @@ -7,17 +7,11 @@ showHelp() cat <a_event_time, sim_vm_interval_units); ++migrated; @@ -406,13 +406,16 @@ if (AIO_QUEUE_VAL != QUEUE_LIST_END) { /* List !Empty */ } else a_event_time = uptr->a_event_time; - AIO_IUNLOCK; + /* Note: Commented out and not deleted. So far, SIMH doesn't appear to + * attempt to reacquire the simh_asynch_lock mutex across threads. + * Reacquiring the mutex would potentially cause a deadlock across threads. */ + /*AIO_IUNLOCK;*/ uptr->a_activate_call (uptr, a_event_time); if (uptr->a_check_completion) { sim_debug (SIM_DBG_AIO_QUEUE, &sim_scp_dev, "Calling Completion Check for asynch event on %s\n", sim_uname(uptr)); uptr->a_check_completion (uptr); } - AIO_ILOCK; + /*AIO_ILOCK;*/ } } AIO_IUNLOCK; @@ -431,12 +434,11 @@ else { uptr->a_event_time = event_time; uptr->a_activate_call = caller; do { - q = AIO_QUEUE_VAL; + q = AIO_QUEUE_VAL(); uptr->a_next = q; /* Mark as on list */ - } while (q != AIO_QUEUE_SET(uptr, q)); + } while (!AIO_QUEUE_SET(uptr, q)); } AIO_IUNLOCK; -sim_asynch_check = 0; /* try to force check */ if (sim_idle_wait) { sim_debug (TIMER_DBG_IDLE, &sim_timer_dev, "waking due to event on %s after %d %s\n", sim_uname(uptr), event_time, sim_vm_interval_units); pthread_cond_signal (&sim_asynch_wake); diff --git a/sim_defs.h b/sim_defs.h index 1f8162560..5c4c14b2f 100644 --- a/sim_defs.h +++ b/sim_defs.h @@ -1244,18 +1244,45 @@ extern int32 sim_asynch_inst_latency; #define USE_AIO_INTRINSICS 1 #endif #endif -#if defined(_WIN32) || defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) + +#if defined(_WIN32) || \ + defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \ + defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \ + (defined(__ATOMIC_ACQ_REL) && defined(__ATOMIC_SEQ_CST) && defined(__ATOMIC_ACQUIRE)) #define USE_AIO_INTRINSICS 1 #endif + /* Provide a way to test both Intrinsic and Lock based queue manipulations */ /* when both are available on a particular platform */ #if defined(DONT_USE_AIO_INTRINSICS) && defined(USE_AIO_INTRINSICS) #undef USE_AIO_INTRINSICS #endif + +/* AIO notes: + + - AIO_LOCK, AIO_UNLOCK: Overall asynchronous I/O mutex managed via + sim_asynch_lock. + + This mutex also serializes output in _sim_debug_write_flush across + threads. + + - AIO_ILOCK, AIO_IUNLOCK: I/O operation lock and unlock. These macros are + only used in the AIO service routines: sim_aio_update_queue and + sim_aio_activate. + + Lock-free (USE_AIO_INTRINSICS) code paths: These macros are empty for + the lock-free code because lock-free code doesn't need to grab a mutex + (otherwise, it wouldn't be lock free.) + + Lock-based code paths: sim_asynch_lock is reinitialized as a recursive + mutex so that AIO_ILOCK/AIO_IUNLOCK can nest within AIO_LOCK/AIO_UNLOCK. + */ + #ifdef USE_AIO_INTRINSICS -/* This approach uses intrinsics to manage access to the link list head */ -/* sim_asynch_queue. This implementation is a completely lock free design */ -/* which avoids the potential ABA issues. */ +/* This approach uses intrinsics to manage access to the linked list head + * sim_asynch_queue. This implementation makes use of processor memory fences + * (hence the "lock free" name) to achieve consistency across cores and + * threads. */ #define AIO_QUEUE_MODE "Lock free asynchronous event queue" #define AIO_INIT \ do { \ @@ -1274,18 +1301,10 @@ extern int32 sim_asynch_inst_latency; pthread_mutex_destroy(&sim_tmxr_poll_lock); \ pthread_cond_destroy(&sim_tmxr_poll_cond); \ } while (0) -#ifdef _WIN32 -#elif defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) -#define InterlockedCompareExchangePointer(Destination, Exchange, Comparand) __sync_val_compare_and_swap(Destination, Comparand, Exchange) -#elif defined(__DECC_VER) -#define InterlockedCompareExchangePointer(Destination, Exchange, Comparand) (void *)((int32)_InterlockedCompareExchange64(Destination, Exchange, Comparand)) -#else -#error "Implementation of function InterlockedCompareExchangePointer() is needed to build with USE_AIO_INTRINSICS" -#endif -#define AIO_ILOCK AIO_LOCK -#define AIO_IUNLOCK AIO_UNLOCK -#define AIO_QUEUE_VAL (UNIT *)(InterlockedCompareExchangePointer((void * volatile *)&sim_asynch_queue, (void *)sim_asynch_queue, NULL)) -#define AIO_QUEUE_SET(newval, oldval) (UNIT *)(InterlockedCompareExchangePointer((void * volatile *)&sim_asynch_queue, (void *)newval, oldval)) + +#define AIO_ILOCK +#define AIO_IUNLOCK + #define AIO_UPDATE_QUEUE sim_aio_update_queue () #define AIO_ACTIVATE(caller, uptr, event_time) \ if (!pthread_equal ( pthread_self(), sim_asynch_main_threadid )) { \ @@ -1320,16 +1339,16 @@ extern int32 sim_asynch_inst_latency; pthread_mutex_destroy(&sim_tmxr_poll_lock); \ pthread_cond_destroy(&sim_tmxr_poll_cond); \ } while (0) + #define AIO_ILOCK AIO_LOCK #define AIO_IUNLOCK AIO_UNLOCK -#define AIO_QUEUE_VAL sim_asynch_queue -#define AIO_QUEUE_SET(newval, oldval) ((sim_asynch_queue = newval),oldval) + #define AIO_UPDATE_QUEUE sim_aio_update_queue () #define AIO_ACTIVATE(caller, uptr, event_time) \ if (!pthread_equal ( pthread_self(), sim_asynch_main_threadid )) { \ sim_debug (SIM_DBG_AIO_QUEUE, sim_dflt_dev, "Queueing Asynch event for %s after %d instructions\n", sim_uname(uptr), event_time);\ AIO_LOCK; \ - if (uptr->a_next) { /* already queued? */ \ + if (NULL != uptr->a_next) { /* already queued? */ \ uptr->a_activate_call = sim_activate_abs; \ } else { \ uptr->a_next = sim_asynch_queue; \ @@ -1350,23 +1369,155 @@ extern int32 sim_asynch_inst_latency; return SCPE_OK; \ } else (void)0 #endif /* USE_AIO_INTRINSICS */ + #define AIO_VALIDATE(uptr) \ if (!pthread_equal ( pthread_self(), sim_asynch_main_threadid )) { \ sim_printf("Improper thread context for operation on %s in %s line %d\n", \ sim_uname(uptr), __FILE__, __LINE__); \ abort(); \ } else (void)0 -#define AIO_CHECK_EVENT \ - if (0 > --sim_asynch_check) { \ - AIO_UPDATE_QUEUE; \ - sim_asynch_check = sim_asynch_inst_latency; \ - } else (void)0 + +/* NOTE: Lock-based AIO will succeed because sim_asynch_lock is a recursive + * mutex when AIO_ILOCK/AIO_IUNLOCK grabs it. Lock-free AIO_ILOCK/AIO_IUNLOCK + * doesn't grab the mutex, so isn't impacted. */ +#define AIO_CHECK_EVENT \ + do { \ + AIO_ILOCK; \ + if (0 > --sim_asynch_check || AIO_QUEUE_VAL() != QUEUE_LIST_END) { \ + AIO_UPDATE_QUEUE; \ + sim_asynch_check = sim_asynch_inst_latency; \ + } \ + AIO_IUNLOCK; \ + } while (0); + #define AIO_SET_INTERRUPT_LATENCY(instpersec) \ do { \ sim_asynch_inst_latency = (int32)((((double)(instpersec))*sim_asynch_latency)/1000000000);\ if (sim_asynch_inst_latency == 0) \ sim_asynch_inst_latency = 1; \ } while (0) + +/*=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~*/ + +/* Inline code hair: */ + +/*=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~*/ + +/* AIO_QUEUE_VAL: Return the AIO queue's head. This function takes advantage + of read fences when available instead of enforcing a full memory fence + (i.e., all reads and writes to sim_asynch_queue must complete.) + + As a static inline, this function is likely to be optimized out into the + underlying intrinsic for SIMH Release versions. Previously, AIO_QUEUE_VAL + was a preprocessor macro. + */ +static SIM_INLINE UNIT *AIO_QUEUE_VAL() +{ +#if USE_AIO_INTRINSICS + +# if defined(_WIN32) + /* Win32 doesn't have an atomic load, use compare-exchange that never + * succeeds to obtain sim_asynch_queue. The Acquire variant imposes a read + * fence, when available (not x86/x86_64, definitely ARM64.) */ + +# if defined(InterlockedCompareExchangePointerAcquire) + return ((UNIT *) InterlockedCompareExchangePointerAcquire((PVOID volatile *) &sim_asynch_queue, sim_asynch_queue, NULL)); +# else + return ((UNIT *) InterlockedCompareExchangePointer((PVOID volatile *) &sim_asynch_queue, sim_asynch_queue, NULL)); +# endif + +# elif defined(__ATOMIC_ACQ_REL) && defined(__ATOMIC_SEQ_CST) && defined(__ATOMIC_ACQUIRE) + /* Newer GCC and Clang synchronization primitives. SIMH uses a sequential + * consistency fence to flush pending writes. Might be able to skate by + * with simple acquire fence; standards strongly suggest that acquire is + * always paired with release. */ + + UNIT *retval; + +# if defined(__GNUC__) + __atomic_load((void * volatile *) &sim_asynch_queue, (void **) &retval, __ATOMIC_SEQ_CST); +# elif defined(__clang__) + __atomic_load(sizeof(sim_asynch_queue), &sim_asynch_queue, (void **) &retval, __ATOMIC_SEQ_CST); +# else + retval = NULL; +# endif + + return retval; +# elif defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) + /* Older GCC synchronization primitives. They still exist, but are + * implemented aliases for "__atomic" intrinsics. Note that there isn't a + * __sync load primitive, so SIMH is left with using a compare and swap + * that never succeeds, returning sim_asynch_queue's value. */ + + return __sync_val_compare_and_swap((void * volatile *) &sim_asynch_queue, &sim_asynch_queue, NULL); +# elif defined(__DECC_VER) + return ((UNIT *) _InterlockedCompareExchange64(&sim_asynch_queue, sim_asynch_queue, NULL)); +# else +#error "Implementation of function InterlockedCompareExchangePointer() is needed to build with USE_AIO_INTRINSICS" +# endif + +#else + /* Not USE_AIO_INTRINSICS. Note there's no read fence here, which could + * be a potential problem on multicore platforms. */ + return sim_asynch_queue; +#endif +} + +/*=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~*/ + +/* AIO_QUEUE_SET: Set the sim_asynch_queue head queue pointer to new_queue_head. + + Returns 1 (true) when successful, 0 (false) if unsuccessful. The underlying + implementation for USE_AIO_INTRINSICS is an atomic compare-exchange; AIO_QUEUE_SET + will fail if another thread modified sim_asynch_queue, causing the compare-exchange + to fail (and the caller should retry.) USE_AIO_INTRINSICS enforces sequential + consistency (i.e., a full read/write memory fence.) + + Success: sim_asynch_queue == current_head (hasn't changed) before exchange + Failure: sim_asynch_queue != current_head (was changed by another core/thread) + before exchange. + + As a static inline, this function is likely to be optimized out into the + underlying intrinsic for SIMH Release versions. Previously, AIO_QUEUE_SET + was a preprocessor macro. + */ +static SIM_INLINE int AIO_QUEUE_SET(UNIT *new_queue_head, UNIT *current_head) +{ +#if USE_AIO_INTRINSICS + +# if defined(_WIN32) + return (InterlockedCompareExchangePointer((PVOID volatile *) &sim_asynch_queue, new_queue_head, current_head) == current_head); +# elif defined(__ATOMIC_ACQ_REL) && defined(__ATOMIC_SEQ_CST) && defined(__ATOMIC_ACQUIRE) + /* Newer GCC and Clang synchronization primitives: __atomic-s. These provide more control + over read and write fences. */ + +# if defined(__GNUC__) + return __atomic_compare_exchange(&sim_asynch_queue, ¤t_head, &new_queue_head, 0, + __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE); +# elif defined(__clang__) + /* Naturally, Clang/LLVM has a different __atomic_compare_exchange signature. */ + return __atomic_compare_exchange(sizeof(sim_asynch_queue), &sim_asynch_queue, ¤t_head, &new_queue_head, + __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE); +# endif +# elif defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) + /* Older GCC (and Clang/LLVM) synchronization primitives. They still exist, but are implemented + in terms of aliases for __atomic intrinsics. Use __atomic-s when available. */ + + return __sync_bool_compare_and_swap(&sim_asynch_queue, current_head, new_queue_head); +# elif defined(__DECC_VER) + return (_InterlockedCompareExchange64(&sim_asynch_queue, new_queue_head, current_head) == current_head); +# else +# error "USE_AIO_INTRINSICS is not implemented for this platform??" +# endif + +#else + /* Not USING_AIO_INTRINSICS. No write fence, which could be a potential + * problem on multicore platforms. */ + sim_asynch_queue = new_queue_head; + return 1; +#endif +} + #else /* !SIM_ASYNCH_IO */ #define AIO_QUEUE_MODE "Asynchronous I/O is not available" #define AIO_UPDATE_QUEUE