From f227ddf58f12afd81663a5cf3a9421b6fd87a49a Mon Sep 17 00:00:00 2001 From: Sheil Kumar Date: Wed, 31 Jul 2024 08:59:55 -0700 Subject: [PATCH] Fix ETW trace logging crash in multithreading situations (#21566) ### Description ETW trace logger is fakely registered as initialized_ is marked as true before the registration is done, causing crashing issue for Lenovo camera application. A prior attempt to address was made here: https://github.com/microsoft/onnxruntime/pull/21226 It was reverted here: https://github.com/microsoft/onnxruntime/pull/21360 ### Motivation and Context The problem is that during initialization of TraceLoggingRegisterEx, it will reinvoke the callback and attempt reinitialization, which is not allowed. TraceLoggingRegisterEx however can be initialized concurrently when initialization happens on multiple threads. For these reasons it needs to be protected by a lock, but the lock cannot naively block because the callback's reinvocation will cause a deadlock. To solve this problem another tracking variable is added : "initializing" which protects against reinitialization during the first initialization. --------- Co-authored-by: Sheil Kumar --- .../core/platform/windows/logging/etw_sink.cc | 27 +++++++++++++++---- .../core/platform/windows/logging/etw_sink.h | 7 ++++- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/platform/windows/logging/etw_sink.cc b/onnxruntime/core/platform/windows/logging/etw_sink.cc index b0f9eaf4f62d2..ef42c88a67ba6 100644 --- a/onnxruntime/core/platform/windows/logging/etw_sink.cc +++ b/onnxruntime/core/platform/windows/logging/etw_sink.cc @@ -137,28 +137,45 @@ void NTAPI EtwRegistrationManager::ORT_TL_EtwEnableCallback( EtwRegistrationManager::~EtwRegistrationManager() { std::lock_guard lock(callbacks_mutex_); callbacks_.clear(); - ::TraceLoggingUnregister(etw_provider_handle); + if (initialization_status_ == InitializationStatus::Initialized || + initialization_status_ == InitializationStatus::Initializing) { + std::lock_guard init_lock(init_mutex_); + assert(initialization_status_ != InitializationStatus::Initializing); + if (initialization_status_ == InitializationStatus::Initialized) { + ::TraceLoggingUnregister(etw_provider_handle); + initialization_status_ = InitializationStatus::NotInitialized; + } + } } EtwRegistrationManager::EtwRegistrationManager() { } -void EtwRegistrationManager::LazyInitialize() { - if (!initialized_) { +void EtwRegistrationManager::LazyInitialize() try { + if (initialization_status_ == InitializationStatus::NotInitialized) { std::lock_guard lock(init_mutex_); - if (!initialized_) { // Double-check locking pattern - initialized_ = true; + if (initialization_status_ == InitializationStatus::NotInitialized) { // Double-check locking pattern + initialization_status_ = InitializationStatus::Initializing; etw_status_ = ::TraceLoggingRegisterEx(etw_provider_handle, ORT_TL_EtwEnableCallback, nullptr); if (FAILED(etw_status_)) { ORT_THROW("ETW registration failed. Logging will be broken: " + std::to_string(etw_status_)); } + initialization_status_ = InitializationStatus::Initialized; } } +} catch (...) { + initialization_status_ = InitializationStatus::Failed; + throw; } void EtwRegistrationManager::InvokeCallbacks(LPCGUID SourceId, ULONG IsEnabled, UCHAR Level, ULONGLONG MatchAnyKeyword, ULONGLONG MatchAllKeyword, PEVENT_FILTER_DESCRIPTOR FilterData, PVOID CallbackContext) { + if (initialization_status_ != InitializationStatus::Initialized) { + // Drop messages until manager is fully initialized. + return; + } + std::lock_guard lock(callbacks_mutex_); for (const auto& callback : callbacks_) { (*callback)(SourceId, IsEnabled, Level, MatchAnyKeyword, MatchAllKeyword, FilterData, CallbackContext); diff --git a/onnxruntime/core/platform/windows/logging/etw_sink.h b/onnxruntime/core/platform/windows/logging/etw_sink.h index 3af45b813a625..d6c9ea27b2955 100644 --- a/onnxruntime/core/platform/windows/logging/etw_sink.h +++ b/onnxruntime/core/platform/windows/logging/etw_sink.h @@ -47,6 +47,11 @@ class EtwSink : public ISink { }; class EtwRegistrationManager { + enum class InitializationStatus { NotInitialized, + Initializing, + Initialized, + Failed }; + public: using EtwInternalCallback = std::function