From 029368a9886bb3a653e18ac2ffe4460ce64a3cac Mon Sep 17 00:00:00 2001 From: Sheil Kumar Date: Wed, 31 Jul 2024 08:59:55 -0700 Subject: [PATCH] Fix ETW trace logging crash in multithreading situations (#21566) ETW trace logger is fakely registered as initialized_ is marked as true before the registration is done, causing crashing issue for Lenovo camera application. A prior attempt to address was made here: https://github.com/microsoft/onnxruntime/pull/21226 It was reverted here: https://github.com/microsoft/onnxruntime/pull/21360 The problem is that during initialization of TraceLoggingRegisterEx, it will reinvoke the callback and attempt reinitialization, which is not allowed. TraceLoggingRegisterEx however can be initialized concurrently when initialization happens on multiple threads. For these reasons it needs to be protected by a lock, but the lock cannot naively block because the callback's reinvocation will cause a deadlock. To solve this problem another tracking variable is added : "initializing" which protects against reinitialization during the first initialization. --------- Co-authored-by: Sheil Kumar --- .../core/platform/windows/logging/etw_sink.cc | 29 +++++++++++++++---- .../core/platform/windows/logging/etw_sink.h | 7 ++++- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/platform/windows/logging/etw_sink.cc b/onnxruntime/core/platform/windows/logging/etw_sink.cc index 5fb7f7a65161d..37fb7c635249d 100644 --- a/onnxruntime/core/platform/windows/logging/etw_sink.cc +++ b/onnxruntime/core/platform/windows/logging/etw_sink.cc @@ -126,28 +126,47 @@ void NTAPI EtwRegistrationManager::ORT_TL_EtwEnableCallback( } EtwRegistrationManager::~EtwRegistrationManager() { - ::TraceLoggingUnregister(etw_provider_handle); + std::lock_guard lock(callbacks_mutex_); + callbacks_.clear(); + if (initialization_status_ == InitializationStatus::Initialized || + initialization_status_ == InitializationStatus::Initializing) { + std::lock_guard init_lock(init_mutex_); + assert(initialization_status_ != InitializationStatus::Initializing); + if (initialization_status_ == InitializationStatus::Initialized) { + ::TraceLoggingUnregister(etw_provider_handle); + initialization_status_ = InitializationStatus::NotInitialized; + } + } } EtwRegistrationManager::EtwRegistrationManager() { } -void EtwRegistrationManager::LazyInitialize() { - if (!initialized_) { +void EtwRegistrationManager::LazyInitialize() try { + if (initialization_status_ == InitializationStatus::NotInitialized) { std::lock_guard lock(init_mutex_); - if (!initialized_) { // Double-check locking pattern - initialized_ = true; + if (initialization_status_ == InitializationStatus::NotInitialized) { // Double-check locking pattern + initialization_status_ = InitializationStatus::Initializing; etw_status_ = ::TraceLoggingRegisterEx(etw_provider_handle, ORT_TL_EtwEnableCallback, nullptr); if (FAILED(etw_status_)) { ORT_THROW("ETW registration failed. Logging will be broken: " + std::to_string(etw_status_)); } + initialization_status_ = InitializationStatus::Initialized; } } +} catch (...) { + initialization_status_ = InitializationStatus::Failed; + throw; } void EtwRegistrationManager::InvokeCallbacks(LPCGUID SourceId, ULONG IsEnabled, UCHAR Level, ULONGLONG MatchAnyKeyword, ULONGLONG MatchAllKeyword, PEVENT_FILTER_DESCRIPTOR FilterData, PVOID CallbackContext) { + if (initialization_status_ != InitializationStatus::Initialized) { + // Drop messages until manager is fully initialized. + return; + } + std::lock_guard lock(callbacks_mutex_); for (const auto& callback : callbacks_) { callback(SourceId, IsEnabled, Level, MatchAnyKeyword, MatchAllKeyword, FilterData, CallbackContext); diff --git a/onnxruntime/core/platform/windows/logging/etw_sink.h b/onnxruntime/core/platform/windows/logging/etw_sink.h index 143c3fcfdfc52..42f09eac3fe7f 100644 --- a/onnxruntime/core/platform/windows/logging/etw_sink.h +++ b/onnxruntime/core/platform/windows/logging/etw_sink.h @@ -47,6 +47,11 @@ class EtwSink : public ISink { }; class EtwRegistrationManager { + enum class InitializationStatus { NotInitialized, + Initializing, + Initialized, + Failed }; + public: using EtwInternalCallback = std::function