Skip to content

Commit

Permalink
Fix crash in QNN EP - ResetQnnLogLevel (#22456)
Browse files Browse the repository at this point in the history
### Description
Fix crash with extra checks ResetQnnLogLevel. 

From the dump it looks like during ETW callbacks, while the provider is stopping, we attempt to reset the QNN log level.
While the QNN BackEndMgr (this) is alive logger_ is not valid

### Motivation and Context
ORT should not crash
  • Loading branch information
ivberg authored Oct 23, 2024
1 parent ba40022 commit 0028d3f
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 48 deletions.
16 changes: 13 additions & 3 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -302,13 +302,21 @@ QnnLog_Level_t QnnBackendManager::MapOrtSeverityToQNNLogLevel(logging::Severity
}

Status QnnBackendManager::ResetQnnLogLevel() {
auto ort_log_level = logger_->GetSeverity();
LOGS(*logger_, INFO) << "Reset Qnn log level to ORT Logger level: " << (unsigned int)ort_log_level;
return UpdateQnnLogLevel(ort_log_level);
std::lock_guard<std::mutex> lock(logger_mutex_);

if (backend_setup_completed_ && logger_ != nullptr) {
auto ort_log_level = logger_->GetSeverity();
LOGS(*logger_, INFO) << "Reset Qnn log level to ORT Logger level: " << (unsigned int)ort_log_level;
return UpdateQnnLogLevel(ort_log_level);
}
return Status::OK();
}

Status QnnBackendManager::UpdateQnnLogLevel(logging::Severity ort_log_level) {
ORT_RETURN_IF(nullptr == log_handle_, "Unable to update QNN Log Level. Invalid QNN log handle.");
ORT_RETURN_IF(false == backend_setup_completed_, "Unable to update QNN Log Level. Backend setup not completed.");
ORT_RETURN_IF(nullptr == logger_, "Unable to update QNN Log Level. Invalid logger.");

QnnLog_Level_t qnn_log_level = MapOrtSeverityToQNNLogLevel(ort_log_level);

LOGS(*logger_, INFO) << "Updating Qnn log level to: " << qnn_log_level;
Expand Down Expand Up @@ -686,6 +694,7 @@ Status QnnBackendManager::LoadCachedQnnContextFromBuffer(char* buffer, uint64_t
}

Status QnnBackendManager::SetupBackend(const logging::Logger& logger, bool load_from_cached_context) {
std::lock_guard<std::mutex> lock(logger_mutex_);
if (backend_setup_completed_) {
LOGS(logger, VERBOSE) << "Backend setup already!";
return Status::OK();
Expand Down Expand Up @@ -972,6 +981,7 @@ void QnnBackendManager::ReleaseResources() {
ORT_THROW("Failed to ShutdownBackend.");
}

std::lock_guard<std::mutex> lock(logger_mutex_);
result = TerminateQnnLog();
if (Status::OK() != result) {
ORT_THROW("Failed to TerminateQnnLog.");
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
#endif

#include <memory>
#include <mutex>
#include <string>
#include <unordered_map>
#include <vector>

#include "HTP/QnnHtpDevice.h"
#include "QnnLog.h"
#include "QnnTypes.h"
Expand Down Expand Up @@ -233,6 +235,7 @@ class QnnBackendManager {

private:
const std::string backend_path_;
std::mutex logger_mutex_;
const logging::Logger* logger_ = nullptr;
QNN_INTERFACE_VER_TYPE qnn_interface_ = QNN_INTERFACE_VER_TYPE_INIT;
QNN_SYSTEM_INTERFACE_VER_TYPE qnn_sys_interface_ = QNN_SYSTEM_INTERFACE_VER_TYPE_INIT;
Expand Down
90 changes: 46 additions & 44 deletions onnxruntime/core/providers/qnn/qnn_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -258,49 +258,6 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio
}
}

#ifdef _WIN32
auto& etwRegistrationManager = logging::EtwRegistrationManager::Instance();
// Register callback for ETW capture state (rundown)
callback_ETWSink_provider_ = onnxruntime::logging::EtwRegistrationManager::EtwInternalCallback(
[&etwRegistrationManager, this](
LPCGUID SourceId,
ULONG IsEnabled,
UCHAR Level,
ULONGLONG MatchAnyKeyword,
ULONGLONG MatchAllKeyword,
PEVENT_FILTER_DESCRIPTOR FilterData,
PVOID CallbackContext) {
ORT_UNUSED_PARAMETER(SourceId);
ORT_UNUSED_PARAMETER(MatchAnyKeyword);
ORT_UNUSED_PARAMETER(MatchAllKeyword);
ORT_UNUSED_PARAMETER(FilterData);
ORT_UNUSED_PARAMETER(CallbackContext);

if (IsEnabled == EVENT_CONTROL_CODE_ENABLE_PROVIDER) {
if ((MatchAnyKeyword & static_cast<ULONGLONG>(onnxruntime::logging::ORTTraceLoggingKeyword::Logs)) != 0) {
auto ortETWSeverity = etwRegistrationManager.MapLevelToSeverity();
(void)qnn_backend_manager_->UpdateQnnLogLevel(ortETWSeverity);
}
if ((MatchAnyKeyword & static_cast<ULONGLONG>(onnxruntime::logging::ORTTraceLoggingKeyword::Profiling)) != 0) {
if (Level != 0) {
// Commenting out Dynamic QNN Profiling for now
// There seems to be a crash in 3rd party QC QnnHtp.dll with this.
// Repro Scenario - start ETW tracing prior to session creation.
// Then disable/enable ETW Tracing with the code below uncommented a few times
// auto profiling_level_etw = GetProfilingLevelFromETWLevel(Level);
// (void)qnn_backend_manager_->SetProfilingLevelETW(profiling_level_etw);
}
}
}

if (IsEnabled == EVENT_CONTROL_CODE_DISABLE_PROVIDER) {
// (void)qnn_backend_manager_->SetProfilingLevelETW(qnn::ProfilingLevel::INVALID);
(void)qnn_backend_manager_->ResetQnnLogLevel();
}
});
etwRegistrationManager.RegisterInternalCallback(callback_ETWSink_provider_);
#endif

// In case ETW gets disabled later
auto profiling_level_pos = provider_options_map.find(PROFILING_LEVEL);
if (profiling_level_pos != provider_options_map.end()) {
Expand Down Expand Up @@ -440,6 +397,49 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio
htp_arch,
soc_model,
enable_htp_weight_sharing_);

#ifdef _WIN32
auto& etwRegistrationManager = logging::EtwRegistrationManager::Instance();
// Register callback for ETW capture state (rundown)
callback_ETWSink_provider_ = onnxruntime::logging::EtwRegistrationManager::EtwInternalCallback(
[&etwRegistrationManager, this](
LPCGUID SourceId,
ULONG IsEnabled,
UCHAR Level,
ULONGLONG MatchAnyKeyword,
ULONGLONG MatchAllKeyword,
PEVENT_FILTER_DESCRIPTOR FilterData,
PVOID CallbackContext) {
ORT_UNUSED_PARAMETER(SourceId);
ORT_UNUSED_PARAMETER(MatchAnyKeyword);
ORT_UNUSED_PARAMETER(MatchAllKeyword);
ORT_UNUSED_PARAMETER(FilterData);
ORT_UNUSED_PARAMETER(CallbackContext);

if (IsEnabled == EVENT_CONTROL_CODE_ENABLE_PROVIDER) {
if ((MatchAnyKeyword & static_cast<ULONGLONG>(onnxruntime::logging::ORTTraceLoggingKeyword::Logs)) != 0) {
auto ortETWSeverity = etwRegistrationManager.MapLevelToSeverity();
(void)qnn_backend_manager_->UpdateQnnLogLevel(ortETWSeverity);
}
if ((MatchAnyKeyword & static_cast<ULONGLONG>(onnxruntime::logging::ORTTraceLoggingKeyword::Profiling)) != 0) {
if (Level != 0) {
// Commenting out Dynamic QNN Profiling for now
// There seems to be a crash in 3rd party QC QnnHtp.dll with this.
// Repro Scenario - start ETW tracing prior to session creation.
// Then disable/enable ETW Tracing with the code below uncommented a few times
// auto profiling_level_etw = GetProfilingLevelFromETWLevel(Level);
// (void)qnn_backend_manager_->SetProfilingLevelETW(profiling_level_etw);
}
}
}

if (IsEnabled == EVENT_CONTROL_CODE_DISABLE_PROVIDER) {
// (void)qnn_backend_manager_->SetProfilingLevelETW(qnn::ProfilingLevel::INVALID);
(void)qnn_backend_manager_->ResetQnnLogLevel();
}
});
etwRegistrationManager.RegisterInternalCallback(callback_ETWSink_provider_);
#endif
}

QNNExecutionProvider::~QNNExecutionProvider() {
Expand All @@ -453,7 +453,9 @@ QNNExecutionProvider::~QNNExecutionProvider() {

// Unregister the ETW callback
#ifdef _WIN32
logging::EtwRegistrationManager::Instance().UnregisterInternalCallback(callback_ETWSink_provider_);
if (callback_ETWSink_provider_ != nullptr) {
logging::EtwRegistrationManager::Instance().UnregisterInternalCallback(callback_ETWSink_provider_);
}
#endif
}

Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/qnn/qnn_execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ class QNNExecutionProvider : public IExecutionProvider {
bool enable_HTP_FP16_precision_ = true;
bool share_ep_contexts_ = false;
#ifdef _WIN32
onnxruntime::logging::EtwRegistrationManager::EtwInternalCallback callback_ETWSink_provider_;
onnxruntime::logging::EtwRegistrationManager::EtwInternalCallback callback_ETWSink_provider_ = nullptr;
#endif
qnn::ModelSettings model_settings_ = {};

Expand Down

0 comments on commit 0028d3f

Please sign in to comment.