From 95935607c90a552563095894ce61ed01d779edcc Mon Sep 17 00:00:00 2001 From: "B. Scott Michel" Date: Tue, 19 Dec 2023 11:06:52 -0800 Subject: [PATCH] SCP: Fix scp_asynch_check cross-thread interference Address a Clang/LLVM sanitizer warning that scp_asynch_check is written by both the AIO and main threads, one thread potentially clobbering the other's value. The "scp_asynch_check = 0" at scp.c:239 is where the thread sanitizer detects the issue. This check is supposed to cause the main thread to process I/O updates on the next AIO_CHECK_EVENT code's execution. To preserve that behavior, AIO_CHECK_EVENT now executes AIO_UPDATE_QUEUE when either sim_asynch_check decrements below 0 or there is work to be done on the AIO queue (sim_asynch_queue != QUEUE_HEAD.) Code refactoring: - Eliminate the asymmetry between the lock-based (mutex) and lock-free implementations. - Lock-free: AIO_ILOCK/AIO_IUNLOCK do not reacquire sim_asynch_lock when compiler intrinsics are present (GCC, Clang, MS C and DEC C on Itanium.) - Lock-based: If DONT_USE_AIO_INTRINSICS is defined, the AIO implementation becomes lock-based via mutexes and AIO_ILOCK/- AIO_IUNLOCK recursively acquire/release sim_asynch_lock. - AIO defaults to the lock-based implementation if compiler intrinsics are not available. - GCC, Clang: Prefer the __atomic intrinsics over the deprecated __sync intrinsics. The __sync intrinics still exist for older GCC compilers. - sim_asynch_lock is a recursive mutex for both lock-based and lock-free implementations. Eliminates implementation asymmetry. - AIO_CHECK_EVENT invokes AIO_ILOCK and AIO_IUNLOCK so that the lock-based code cannot alter sim_asynch_queue when checking for pending I/O work. - sim_debug_io_lock: Debug output serialization lock. Previously, sim_asynch_lock was semantically overloaded to serialize output from _sim_debug_write_flush. This lock provides better semantic clarity. - New builder script flag to disable AIO lock-free, force AIO lock-based code: - cmake-builder.ps1 -noaiointrinsics ... - cmake-builder.sh -no-aio-intrinics ... --- CMakeLists.txt | 3 + README-CMake.md | 46 ++++--- cmake/cmake-builder.sh | 3 +- cmake/pthreads-dep.cmake | 6 +- scp.c | 75 +++++++---- sim_defs.h | 274 +++++++++++++++++++++++++-------------- 6 files changed, 265 insertions(+), 142 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a715f9a6c..2b74fbef6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -229,6 +229,9 @@ option(SIMH_PACKAGE_SUFFIX option(MAC_UNIVERSAL "macOS universal binary flag: TRUE -> build universal binaries, FALSE -> don't." ${MAC_UNIVERSAL_OPTVAL}) +option(DONT_USE_AIO_INTRINSICS + "Don't use compiler/platform intrinsics for AIO, revert to lock-based AIO" + FALSE) # Places where CMake should look for dependent package configuration fragments and artifacts: set(SIMH_PREFIX_PATH_LIST) diff --git a/README-CMake.md b/README-CMake.md index 406a3db51..2c8e76a86 100644 --- a/README-CMake.md +++ b/README-CMake.md @@ -511,25 +511,22 @@ or video support. # List the supported command line flags: $ cmake/cmake-builder.sh --help - Configure and build simh simulators on Linux and *nix-like platforms. + ** cmake version 3.18.4 - Subdirectories: - cmake/build-unix: Makefile-based build simulators - cmake/build-ninja: Ninja build-based simulators + CMake suite maintained and supported by Kitware (kitware.com/cmake). + Configure and build simh simulators on Linux and *nix-like platforms. - Options: - -------- + Compile/Build options: + ---------------------- --clean (-x) Remove the build subdirectory --generate (-g) Generate the build environment, don't compile/build --parallel (-p) Enable build parallelism (parallel builds) - --nonetwork Build simulators without network support - --novideo Build simulators without video support --notest Do not execute 'ctest' test cases --noinstall Do not install SIMH simulators. --testonly Do not build, execute the 'ctest' test cases --installonly Do not build, install the SIMH simulators - --flavor (-f) Specifies the build flavor. Valid flavors are: + --flavor (-f) [Required] Specifies the build flavor. Valid flavors are: unix ninja xcode @@ -541,8 +538,7 @@ or video support. --config (-c) Specifies the build configuration: 'Release' or 'Debug' --target Build a specific simulator or simulators. Separate multiple - targets by separating with a comma, - e.g. "--target pdp8,pdp11,vax750,altairz80,3b2" + targets with a comma, e.g. "--target pdp8,pdp11,vax750,altairz80,3b2" --lto Enable Link Time Optimization (LTO) in Release builds --debugWall Enable maximal warnings in Debug builds --cppcheck Enable cppcheck static code analysis rules @@ -553,6 +549,17 @@ or video support. --verbose Turn on verbose build output + SIMH feature control options: + ----------------------------- + --nonetwork Build simulators without network support + --novideo Build simulators without video support + --no-aio-intrinsics + Do not use compiler/platform intrinsics to implement AIO + functions (aka "lock-free" AIO), reverts to lock-based AIO + if threading libraries are detected. + + Other options: + -------------- --help (-h) Print this help. ``` @@ -569,7 +576,7 @@ or video support. PS C:\...\open-simh> Get-Help -deatailed cmake\cmake-builder.ps1 NAME - C:\Users\bsm21317\play\open-simh\cmake\cmake-builder.ps1 + C:\...\play\open-simh\cmake\cmake-builder.ps1 SYNOPSIS Configure and build SIMH's dependencies and simulators using the Microsoft Visual @@ -577,9 +584,9 @@ or video support. SYNTAX - C:\Users\bsm21317\play\open-simh\cmake\cmake-builder.ps1 [[-flavor] ] [[-config] ] [[-cpack_suffix] ] [[-target] ] - [-clean] [-help] [-nonetwork] [-novideo] [-notest] [-noinstall] [-parallel] [-generate] [-regenerate] [-testonly] [-installOnly] [-windeprecation] - [-package] [-lto] [-debugWall] [-cppcheck] [] + C:\...\play\open-simh\cmake\cmake-builder.ps1 [[-flavor] ] [[-config] ] [[-cpack_suffix] ] [[-target] + ] [-clean] [-help] [-nonetwork] [-novideo] [-noaioinstrinsics] [-notest] [-noinstall] [-parallel] [-generate] [-testonly] + [-installOnly] [-windeprecation] [-lto] [-debugWall] [-cppcheck] [] DESCRIPTION @@ -588,9 +595,9 @@ or video support. 1. Configure and generate the build environment selected by '-flavor' option. 2. Build missing runtime dependencies and the simulator suite with the compiler - configuration selected by the '-config' option. The "Release" configuration - generates optimized executables; the "Debug" configuration generates - development executables with debugger information. + configuration selected by the '-config' option. The "Release" configuration + generates optimized executables; the "Debug" configuration generates + development executables with debugger information. 3. Test the simulators There is an install phase that can be invoked separately as part of the SIMH @@ -624,6 +631,9 @@ or video support. mingw-make MinGW GCC/mingw32-make mingw-ninja MinGW GCC/ninja + -config + The target build configuration. Valid values: "Release" and "Debug" + [...truncated for brevity...] ``` diff --git a/cmake/cmake-builder.sh b/cmake/cmake-builder.sh index fc9015f1c..b4a3fffd2 100755 --- a/cmake/cmake-builder.sh +++ b/cmake/cmake-builder.sh @@ -7,8 +7,7 @@ showHelp() cat <a_next = (UNIT *) q; /* Mark as on list */ + } while (!sim_sync_cmpxchg((void * volatile *) &sim_asynch_queue, unit, (void *) q)); +} + int sim_aio_update_queue (void) { int migrated = 0; AIO_ILOCK; -if (AIO_QUEUE_VAL != QUEUE_LIST_END) { /* List !Empty */ - UNIT *q, *uptr; +if (!AIO_QUEUE_EMPTY()) { + volatile UNIT *q; + UNIT *uptr; int32 a_event_time; - do { /* Grab current queue */ - q = AIO_QUEUE_VAL; - } while (q != AIO_QUEUE_SET(QUEUE_LIST_END, q)); - while (q != QUEUE_LIST_END) { /* List !Empty */ - sim_debug (SIM_DBG_AIO_QUEUE, &sim_scp_dev, "Migrating Asynch event for %s after %d %s\n", sim_uname(q), q->a_event_time, sim_vm_interval_units); + for (q = aio_queue_worklist(); q != QUEUE_LIST_END; /* empty */) { + uptr = (UNIT *) q; + sim_debug (SIM_DBG_AIO_QUEUE, &sim_scp_dev, "Migrating Asynch event for %s after %d %s\n", + sim_uname(uptr), uptr->a_event_time, sim_vm_interval_units); ++migrated; - uptr = q; q = q->a_next; - uptr->a_next = NULL; /* hygiene */ + uptr->a_next = NULL; /* hygiene */ if (uptr->a_activate_call != &sim_activate_notbefore) { - a_event_time = uptr->a_event_time-((sim_asynch_inst_latency+1)/2); + a_event_time = uptr->a_event_time - ((sim_asynch_inst_latency + 1) / 2); if (a_event_time < 0) a_event_time = 0; } else a_event_time = uptr->a_event_time; - AIO_IUNLOCK; + uptr->a_activate_call (uptr, a_event_time); + if (uptr->a_check_completion) { sim_debug (SIM_DBG_AIO_QUEUE, &sim_scp_dev, "Calling Completion Check for asynch event on %s\n", sim_uname(uptr)); uptr->a_check_completion (uptr); } - AIO_ILOCK; } } AIO_IUNLOCK; @@ -423,22 +454,19 @@ return migrated; void sim_aio_activate (ACTIVATE_API caller, UNIT *uptr, int32 event_time) { -AIO_ILOCK; sim_debug (SIM_DBG_AIO_QUEUE, &sim_scp_dev, "Queueing Asynch event for %s after %d %s\n", sim_uname(uptr), event_time, sim_vm_interval_units); -if (uptr->a_next) { + +AIO_ILOCK; +if (NULL != uptr->a_next) { uptr->a_activate_call = sim_activate_abs; } else { - UNIT *q; uptr->a_event_time = event_time; uptr->a_activate_call = caller; - do { - q = AIO_QUEUE_VAL; - uptr->a_next = q; /* Mark as on list */ - } while (q != AIO_QUEUE_SET(uptr, q)); + aio_enqueue_unit(uptr); } AIO_IUNLOCK; -sim_asynch_check = 0; /* try to force check */ + if (sim_idle_wait) { sim_debug (TIMER_DBG_IDLE, &sim_timer_dev, "waking due to event on %s after %d %s\n", sim_uname(uptr), event_time, sim_vm_interval_units); pthread_cond_signal (&sim_asynch_wake); @@ -7036,7 +7064,7 @@ sim_show_clock_queues (st, dnotused, unotused, flag, cptr); pthread_mutex_lock (&sim_asynch_lock); sim_mfile = &buf; fprintf (st, "asynchronous pending event queue\n"); -if (sim_asynch_queue == QUEUE_LIST_END) +if (AIO_QUEUE_EMPTY()) fprintf (st, " Empty\n"); else { for (uptr = sim_asynch_queue; uptr != QUEUE_LIST_END; uptr = uptr->a_next) { @@ -13653,7 +13681,8 @@ if (sim_deb_switches & SWMASK ('F')) { /* filtering disabled? */ _debug_fwrite (buf, len); /* output now. */ return; /* done */ } -AIO_LOCK; + +AIO_DEBUG_IO_ACTIVE; if (debug_line_offset + len + 1 > debug_line_bufsize) { /* realloc(NULL, size) == malloc(size). Initialize the malloc()-ed space. Only need to test debug_line_buf since SIMH allocates both buffers at the same @@ -13738,7 +13767,7 @@ while (NULL != (eol = strchr (debug_line_buf, '\n')) || flush) { memmove (debug_line_buf, eol + 1, debug_line_offset); debug_line_buf[debug_line_offset] = '\0'; } -AIO_UNLOCK; +AIO_DEBUG_IO_DONE; } static void _sim_debug_write (const char *buf, size_t len) diff --git a/sim_defs.h b/sim_defs.h index 3954ca07f..bea483a70 100644 --- a/sim_defs.h +++ b/sim_defs.h @@ -1172,6 +1172,11 @@ extern int32 sim_asynch_check; extern int32 sim_asynch_latency; extern int32 sim_asynch_inst_latency; +/* Debug I/O serialization lock (sim_asynch_lock was used for debug I/O + * serialization, now separate for semantic clarity. Not performance + * critical. */ +extern pthread_mutex_t sim_debug_io_lock; + /* Thread local storage */ #if defined(thread_local) #define AIO_TLS thread_local @@ -1186,6 +1191,7 @@ extern int32 sim_asynch_inst_latency; /* It is primarily used only used in debugging messages */ #define AIO_TLS #endif + #define AIO_QUEUE_CHECK(que, lock) \ do { \ UNIT *_cptr; \ @@ -1207,10 +1213,14 @@ extern int32 sim_asynch_inst_latency; pthread_mutex_unlock (lock); \ } while (0) #define AIO_MAIN_THREAD (pthread_equal ( pthread_self(), sim_asynch_main_threadid )) -#define AIO_LOCK \ +#define AIO_LOCK \ pthread_mutex_lock(&sim_asynch_lock) -#define AIO_UNLOCK \ +#define AIO_UNLOCK \ pthread_mutex_unlock(&sim_asynch_lock) +#define AIO_DEBUG_IO_ACTIVE \ + pthread_mutex_lock(&sim_debug_io_lock) +#define AIO_DEBUG_IO_DONE \ + pthread_mutex_unlock(&sim_debug_io_lock) #define AIO_IS_ACTIVE(uptr) (((uptr)->a_is_active ? (uptr)->a_is_active (uptr) : FALSE) || ((uptr)->a_next)) #if defined(SIM_ASYNCH_MUX) #define AIO_CANCEL(uptr) \ @@ -1224,9 +1234,11 @@ extern int32 sim_asynch_inst_latency; #if !defined(AIO_CANCEL) #define AIO_CANCEL(uptr) #endif /* !defined(AIO_CANCEL) */ + #define AIO_EVENT_BEGIN(uptr) \ do { \ int __was_poll = uptr->dynflags & UNIT_TM_POLL + #define AIO_EVENT_COMPLETE(uptr, reason) \ if (__was_poll) { \ pthread_mutex_lock (&sim_tmxr_poll_lock); \ @@ -1242,65 +1254,6 @@ extern int32 sim_asynch_inst_latency; AIO_UPDATE_QUEUE; \ } while (0) -#if defined(__DECC_VER) -#include -#if defined(__IA64) -#define USE_AIO_INTRINSICS 1 -#endif -#endif -#if defined(_WIN32) || defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) -#define USE_AIO_INTRINSICS 1 -#endif -/* Provide a way to test both Intrinsic and Lock based queue manipulations */ -/* when both are available on a particular platform */ -#if defined(DONT_USE_AIO_INTRINSICS) && defined(USE_AIO_INTRINSICS) -#undef USE_AIO_INTRINSICS -#endif -#ifdef USE_AIO_INTRINSICS -/* This approach uses intrinsics to manage access to the link list head */ -/* sim_asynch_queue. This implementation is a completely lock free design */ -/* which avoids the potential ABA issues. */ -#define AIO_QUEUE_MODE "Lock free asynchronous event queue" -#define AIO_INIT \ - do { \ - sim_asynch_main_threadid = pthread_self(); \ - /* Empty list/list end uses the point value (void *)1. \ - This allows NULL in an entry's a_next pointer to \ - indicate that the entry is not currently in any list */ \ - sim_asynch_queue = QUEUE_LIST_END; \ - } while (0) -#define AIO_CLEANUP \ - do { \ - pthread_mutex_destroy(&sim_asynch_lock); \ - pthread_cond_destroy(&sim_asynch_wake); \ - pthread_mutex_destroy(&sim_timer_lock); \ - pthread_cond_destroy(&sim_timer_wake); \ - pthread_mutex_destroy(&sim_tmxr_poll_lock); \ - pthread_cond_destroy(&sim_tmxr_poll_cond); \ - } while (0) -#ifdef _WIN32 -#elif defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) -#define InterlockedCompareExchangePointer(Destination, Exchange, Comparand) __sync_val_compare_and_swap(Destination, Comparand, Exchange) -#elif defined(__DECC_VER) -#define InterlockedCompareExchangePointer(Destination, Exchange, Comparand) (void *)((int32)_InterlockedCompareExchange64(Destination, Exchange, Comparand)) -#else -#error "Implementation of function InterlockedCompareExchangePointer() is needed to build with USE_AIO_INTRINSICS" -#endif -#define AIO_ILOCK AIO_LOCK -#define AIO_IUNLOCK AIO_UNLOCK -#define AIO_QUEUE_VAL (UNIT *)(InterlockedCompareExchangePointer((void * volatile *)&sim_asynch_queue, (void *)sim_asynch_queue, NULL)) -#define AIO_QUEUE_SET(newval, oldval) (UNIT *)(InterlockedCompareExchangePointer((void * volatile *)&sim_asynch_queue, (void *)newval, oldval)) -#define AIO_UPDATE_QUEUE sim_aio_update_queue () -#define AIO_ACTIVATE(caller, uptr, event_time) \ - if (!pthread_equal ( pthread_self(), sim_asynch_main_threadid )) { \ - sim_aio_activate ((ACTIVATE_API)caller, uptr, event_time); \ - return SCPE_OK; \ - } else (void)0 -#else /* !USE_AIO_INTRINSICS */ -/* This approach uses a pthread mutex to manage access to the link list */ -/* head sim_asynch_queue. It will always work, but may be slower than the */ -/* lock free approach when using USE_AIO_INTRINSICS */ -#define AIO_QUEUE_MODE "Lock based asynchronous event queue" #define AIO_INIT \ do { \ pthread_mutexattr_t attr; \ @@ -1315,6 +1268,7 @@ extern int32 sim_asynch_inst_latency; indicate that the entry is not currently in any list */ \ sim_asynch_queue = QUEUE_LIST_END; \ } while (0) + #define AIO_CLEANUP \ do { \ pthread_mutex_destroy(&sim_asynch_lock); \ @@ -1323,54 +1277,176 @@ extern int32 sim_asynch_inst_latency; pthread_cond_destroy(&sim_timer_wake); \ pthread_mutex_destroy(&sim_tmxr_poll_lock); \ pthread_cond_destroy(&sim_tmxr_poll_cond); \ + pthread_mutex_destroy(&sim_debug_io_lock); \ } while (0) -#define AIO_ILOCK AIO_LOCK -#define AIO_IUNLOCK AIO_UNLOCK -#define AIO_QUEUE_VAL sim_asynch_queue -#define AIO_QUEUE_SET(newval, oldval) ((sim_asynch_queue = newval),oldval) + +/* Interior AIO locking: */ +#if !defined(DONT_USE_AIO_INTRINSICS) && \ + (defined(_WIN32) || \ + (defined(__ATOMIC_ACQ_REL) && defined(__ATOMIC_SEQ_CST) && defined(__ATOMIC_ACQUIRE)) || \ + (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8)) || \ + (defined(__DECC_VER) && defined(_IA64))) +/* Atomic compare/exchange exists and is sufficient to manage the AIO queue across + * threads, no extra mutex locking needed. */ +# define AIO_ILOCK +# define AIO_IUNLOCK + +# define AIO_QUEUE_MODE "Thread-based asynchronous event queue with intrinsics" +# define AIO_MUTEX_ONLY 0 +#else +/* Acquire the sim_asynch_lock mutex to ensure exclusion when manipulating + * sim_asynch_queue. */ +# define AIO_ILOCK AIO_LOCK +# define AIO_IUNLOCK AIO_UNLOCK + +# define AIO_QUEUE_MODE "Thread-based asynchronous event queue, mutex-only" +# define AIO_MUTEX_ONLY 1 +#endif + #define AIO_UPDATE_QUEUE sim_aio_update_queue () -#define AIO_ACTIVATE(caller, uptr, event_time) \ - if (!pthread_equal ( pthread_self(), sim_asynch_main_threadid )) { \ - sim_debug (SIM_DBG_AIO_QUEUE, sim_dflt_dev, "Queueing Asynch event for %s after %d instructions\n", sim_uname(uptr), event_time);\ - AIO_LOCK; \ - if (uptr->a_next) { /* already queued? */ \ - uptr->a_activate_call = sim_activate_abs; \ - } else { \ - uptr->a_next = sim_asynch_queue; \ - uptr->a_event_time = event_time; \ - uptr->a_activate_call = (ACTIVATE_API)&caller; \ - sim_asynch_queue = uptr; \ - } \ - if (sim_idle_wait) { \ - if (sim_deb) { /* only while debug do lock/unlock overhead */ \ - AIO_UNLOCK; \ - sim_debug (TIMER_DBG_IDLE, &sim_timer_dev, "waking due to event on %s after %d instructions\n", sim_uname(uptr), event_time);\ - AIO_LOCK; \ - } \ - pthread_cond_signal (&sim_asynch_wake); \ - } \ - AIO_UNLOCK; \ - sim_asynch_check = 0; \ - return SCPE_OK; \ + +#define AIO_ACTIVATE(caller, uptr, event_time) \ + if (!AIO_MAIN_THREAD) { \ + AIO_LOCK; \ + sim_aio_activate ((ACTIVATE_API)caller, uptr, event_time); \ + AIO_UNLOCK; \ + return SCPE_OK; \ } else (void)0 -#endif /* USE_AIO_INTRINSICS */ -#define AIO_VALIDATE(uptr) \ - if (!pthread_equal ( pthread_self(), sim_asynch_main_threadid )) { \ + +#define AIO_VALIDATE(uptr) \ + if (!AIO_MAIN_THREAD) { \ sim_printf("Improper thread context for operation on %s in %s line %d\n", \ - sim_uname(uptr), __FILE__, __LINE__); \ - abort(); \ - } else (void)0 -#define AIO_CHECK_EVENT \ - if (0 > --sim_asynch_check) { \ - AIO_UPDATE_QUEUE; \ - sim_asynch_check = sim_asynch_inst_latency; \ + sim_uname(uptr), __FILE__, __LINE__); \ + abort(); \ } else (void)0 + +#define AIO_CHECK_EVENT \ + do { \ + AIO_ILOCK; \ + if (0 > --sim_asynch_check || !AIO_QUEUE_EMPTY()) { \ + AIO_UPDATE_QUEUE; \ + sim_asynch_check = sim_asynch_inst_latency; \ + } \ + AIO_IUNLOCK; \ + } while (0); + #define AIO_SET_INTERRUPT_LATENCY(instpersec) \ do { \ sim_asynch_inst_latency = (int32)((((double)(instpersec))*sim_asynch_latency)/1000000000);\ if (sim_asynch_inst_latency == 0) \ sim_asynch_inst_latency = 1; \ } while (0) + +/*=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~*/ +/* Inline code hair: */ +/*=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~*/ +/* sim_sync_cmpxchg: Wrapper function around platform-dependent atomic compare/exchange + * primitives that enqueues src onto dest. + * + * Compares *dest to current, and stores src in dest if *dest == current. + * + * Returns: + * 0: *dest != current (failed, need to retry due to thread interference) + * 1: *dest == current (success) + */ +static SIM_INLINE int sim_sync_cmpxchg(void * volatile *dest, void *src, void *current) +{ +#if !AIO_MUTEX_ONLY +# if defined(__ATOMIC_ACQ_REL) && defined(__ATOMIC_SEQ_CST) && defined(__ATOMIC_ACQUIRE) + /* Newer GCC and Clang synchronization primitives: __atomic-s. These + * provide more control over read and write fences. */ +# if defined(__GNUC__) + return __atomic_compare_exchange(dest, ¤t, &src, 0, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE); +# elif defined(__clang__) + /* Naturally, Clang/LLVM has a different __atomic_compare_exchange + * signature. */ + return __atomic_compare_exchange(sizeof(dest), dest, ¤t, &src, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE); +# endif +# elif defined(_WIN32) + return (InterlockedCompareExchangePointer(dest, src, current) == current); +# elif defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) + /* Older GCC (and Clang/LLVM) synchronization primitives. They still + * exist, but are implemented in terms of aliases for __atomic + * intrinsics. Use __atomic-s when available. */ + + return __sync_bool_compare_and_swap(dest, current, src); +# elif defined(__DECC_VER) + return (_InterlockedCompareExchange64(dest, src, current) == current); +# endif +#else + /* No atomic compare/exchange or mutex-only. Potential issue for + * multicore platforms. */ + *dest = src; + return 1; +#endif +} + +/*=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~*/ +/* sim_sync_load_pointer: Wrapper function around platform-dependent atomic load + * instrinsics. + */ +static SIM_INLINE void *sim_sync_load_pointer(void * volatile *src) +{ +#if !AIO_MUTEX_ONLY +# if defined(__ATOMIC_ACQ_REL) && defined(__ATOMIC_SEQ_CST) && defined(__ATOMIC_ACQUIRE) + /* Newer GCC and Clang synchronization primitives. SIMH uses a sequential + * consistency fence to flush pending writes. Might be able to skate by + * with simple acquire fence; standards strongly suggest that acquire is + * always paired with release. */ + + void *retval; + +# if defined(__GNUC__) + __atomic_load(src, &retval, __ATOMIC_SEQ_CST); +# elif defined(__clang__) + __atomic_load(sizeof(src), src, &retval, __ATOMIC_SEQ_CST); +# else + retval = NULL; +# endif + + return retval; +# elif defined(_WIN32) +# if defined(_M_IX86) || defined(_M_X64) + /* Intel implements Total Store Ordering (TSO), which implies that aligned + * reads are atomic across cores so long as locked compare/exchange or + * other synchronized instructions store values. */ + return *src; +# else + /* For all other platforms: Win32 does not have an interlocked load, and + * the closest emulation is a compare/exchange that always fails against an + * impossible value. */ +# if defined(InterlockedCompareExchangePointerAcquire) + return InterlockedCompareExchangePointerAcquire(src, *src, NULL); +# else + return InterlockedCompareExchangePointer(src, *src, NULL); +# endif +# endif +# elif defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) + /* Older GCC synchronization primitives. They still exist, but are + * implemented aliases for "__atomic" intrinsics. Note that there isn't a + * __sync load primitive, so SIMH is left with using a compare and swap + * that never succeeds, returning sim_asynch_queue's value. */ + + return __sync_val_compare_and_swap(src, NULL, *src); +# elif defined(__DECC_VER) + return _InterlockedCompareExchange64(src, *src, NULL); +# endif +#else + /* Not USE_AIO_INTRINSICS. Note there's no read fence here, which could + * be a potential problem on multicore platforms. */ + return (void *) *src; +#endif +} + +/*=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~*/ +/* AIO_QUEUE_EMPTY: Return true (1) if sim_asynch_queue is empty (points to + * QUEUE_LIST_END). + */ +static SIM_INLINE int AIO_QUEUE_EMPTY() +{ + return (sim_sync_load_pointer((void * volatile *) &sim_asynch_queue) == QUEUE_LIST_END); +} + #else /* !SIM_ASYNCH_IO */ #define AIO_QUEUE_MODE "Asynchronous I/O is not available" #define AIO_UPDATE_QUEUE @@ -1381,6 +1457,8 @@ extern int32 sim_asynch_inst_latency; #define AIO_MAIN_THREAD TRUE #define AIO_LOCK #define AIO_UNLOCK +#define AIO_DEBUG_IO_ACTIVE +#define AIO_DEBUG_IO_DONE #define AIO_CLEANUP #define AIO_EVENT_BEGIN(uptr) #define AIO_EVENT_COMPLETE(uptr, reason)