Skip to content

Commit

Permalink
Fully dynamic ETW controlled logging for ORT and QNN logs (microsoft#…
Browse files Browse the repository at this point in the history
…20537)

### Description
Windows - Fully dynamic ETW controlled logging for ORT and QNN logs

The logging support is documented here 
-
https://onnxruntime.ai/docs/performance/tune-performance/logging_tracing.html#tracing---windows
-
https://onnxruntime.ai/docs/performance/tune-performance/profiling-tools.html#tracelogging-etw-windows-profiling

Also add support for logging ORT SessionCreation on ETW CaptureState

### Motivation and Context
The previous ETW support only worked if you enabled ETW before the
session started. There can commonly be long-lived AI inference processes
that need to be traced & debugged. This enables logging fully on the
fly.

Without this support a dev would have to end up killing a process or
stopping a service in order to get tracing. We had to do this for a
recent issue with QNN, and it was a bit painful to get the logs and it
ruined the repro.

### Testing
I tested with the following cases
- Leaving default ORT run
- Enabling ETW prior to start and leaving running for entire session +
inferences, then stopping
- Starting ORT session + inf, then enabling and stopping ETW
  - Start ORT session /w long running Inferences 
- wpr -start
[ort.wprp](https://github.com/microsoft/onnxruntime/blob/e6228575e4d5866bdb831e76cc93e6c35af4de8b/ort.wprp#L4)
-start
[etw_provider.wprp](https://github.com/microsoft/onnxruntime/blob/e6228575e4d5866bdb831e76cc93e6c35af4de8b/onnxruntime/test/platform/windows/logging/etw_provider.wprp)
  - Wait a few seconds
  - wpr -stop ort.etl
  - Inferences are still running
- Verify ONNXRuntimeLogEvent provider events are present and new
SessionCreation_CaptureState event under Microsoft.ML.ONNXRuntime
provider

Related:
microsoft#18882
microsoft#19428
  • Loading branch information
ivberg authored Jun 7, 2024
1 parent f8b5c28 commit 74028e4
Show file tree
Hide file tree
Showing 19 changed files with 501 additions and 96 deletions.
7 changes: 6 additions & 1 deletion include/onnxruntime/core/common/logging/isink.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@
#include <string>

#include "core/common/logging/logging.h"
#include "core/common/logging/sink_types.h"

namespace onnxruntime {
namespace logging {
class ISink {
public:
ISink() = default;
explicit ISink(SinkType type = SinkType::BaseSink) : type_(type) {}

SinkType GetType() const { return type_; }

/**
Sends the message to the sink.
Expand All @@ -32,6 +35,8 @@ class ISink {
virtual ~ISink() = default;

private:
SinkType type_;

// Make Code Analysis happy by disabling all for now. Enable as needed.
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(ISink);

Expand Down
32 changes: 26 additions & 6 deletions include/onnxruntime/core/common/logging/logging.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
#include "core/common/common.h"
#include "core/common/profiler_common.h"
#include "core/common/logging/capture.h"
#include "core/common/logging/severity.h"

#include "core/common/logging/macros.h"

#include "core/common/logging/severity.h"
#include "core/common/logging/sink_types.h"
#include "core/platform/ort_mutex.h"
#include "date/date.h"

/*
Expand Down Expand Up @@ -167,6 +167,23 @@ class LoggingManager final {
*/
static bool HasDefaultLogger() { return nullptr != s_default_logger_; }

/**
Gets the default instance of the LoggingManager.
*/
static LoggingManager* GetDefaultInstance();

/**
Removes a Sink if one is present
*/
void RemoveSink(SinkType sinkType);

/**
Adds a Sink to the current sink creating a CompositeSink if necessary
Sinks types must be unique
@param severity The severity level for the new Sink
*/
bool AddSinkOfType(SinkType sinkType, std::function<std::unique_ptr<ISink>()> sinkFactory, logging::Severity severity);

/**
Change the minimum severity level for log messages to be output by the default logger.
@param severity The severity.
Expand Down Expand Up @@ -214,7 +231,10 @@ class LoggingManager final {
void CreateDefaultLogger(const std::string& logger_id);

std::unique_ptr<ISink> sink_;
const Severity default_min_severity_;
#ifdef _WIN32
mutable OrtMutex sink_mutex_;
#endif
Severity default_min_severity_;
const bool default_filter_user_data_;
const int default_max_vlog_level_;
bool owns_default_logger_;
Expand Down Expand Up @@ -362,8 +382,8 @@ unsigned int GetProcessId();
/**
If the ONNXRuntimeTraceLoggingProvider ETW Provider is enabled, then adds to the existing logger.
*/
std::unique_ptr<ISink> EnhanceLoggerWithEtw(std::unique_ptr<ISink> existingLogger, logging::Severity originalSeverity,
logging::Severity etwSeverity);
std::unique_ptr<ISink> EnhanceSinkWithEtw(std::unique_ptr<ISink> existingSink, logging::Severity originalSeverity,
logging::Severity etwSeverity);

/**
If the ONNXRuntimeTraceLoggingProvider ETW Provider is enabled, then can override the logging level.
Expand Down
11 changes: 11 additions & 0 deletions include/onnxruntime/core/common/logging/sink_types.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#pragma once

namespace onnxruntime {
namespace logging {
enum class SinkType {
BaseSink,
CompositeSink,
EtwSink
};
} // namespace logging
} // namespace onnxruntime
72 changes: 60 additions & 12 deletions onnxruntime/core/common/logging/logging.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
#include "core/common/exceptions.h"
#include "core/common/logging/isink.h"
#include "core/common/logging/logging.h"
#include "core/common/logging/sinks/composite_sink.h"

#ifdef _WIN32
#include <Windows.h>
#include "core/platform/windows/logging/etw_sink.h"
#include "core/common/logging/sinks/composite_sink.h"
#else
#include <unistd.h>
#if defined(__MACH__) || defined(__wasm__) || defined(_AIX)
Expand All @@ -22,10 +22,10 @@
#include <sys/syscall.h>
#endif
#endif
#include "core/platform/ort_mutex.h"

#if __FreeBSD__
#include <sys/thr.h> // Use thr_self() syscall under FreeBSD to get thread id
#include "logging.h"
#endif

namespace onnxruntime {
Expand All @@ -52,6 +52,10 @@ static std::atomic<void*>& DefaultLoggerManagerInstance() noexcept {
return default_instance;
}

LoggingManager* LoggingManager::GetDefaultInstance() {
return static_cast<LoggingManager*>(DefaultLoggerManagerInstance().load());
}

// GSL_SUPRESS(i.22) is broken. Ignore the warnings for the static local variables that are trivial
// and should not have any destruction order issues via pragmas instead.
// https://developercommunity.visualstudio.com/content/problem/249706/gslsuppress-does-not-work-for-i22-c-core-guideline.html
Expand All @@ -66,6 +70,7 @@ static OrtMutex& DefaultLoggerMutex() noexcept {
}

Logger* LoggingManager::s_default_logger_ = nullptr;
OrtMutex sink_mutex_;

#ifdef _MSC_VER
#pragma warning(pop)
Expand Down Expand Up @@ -245,35 +250,78 @@ unsigned int GetProcessId() {
#endif
}

std::unique_ptr<ISink> EnhanceLoggerWithEtw(std::unique_ptr<ISink> existingLogger, logging::Severity originalSeverity,
logging::Severity etwSeverity) {
std::unique_ptr<ISink> EnhanceSinkWithEtw(std::unique_ptr<ISink> existing_sink, logging::Severity original_severity,
logging::Severity etw_severity) {
#ifdef _WIN32
auto& manager = EtwRegistrationManager::Instance();
if (manager.IsEnabled()) {
auto compositeSink = std::make_unique<CompositeSink>();
compositeSink->AddSink(std::move(existingLogger), originalSeverity);
compositeSink->AddSink(std::make_unique<EtwSink>(), etwSeverity);
compositeSink->AddSink(std::move(existing_sink), original_severity);
compositeSink->AddSink(std::make_unique<EtwSink>(), etw_severity);
return compositeSink;
} else {
return existingLogger;
return existing_sink;
}
#else
// On non-Windows platforms, just return the existing logger
(void)originalSeverity;
(void)etwSeverity;
return existingLogger;
(void)original_severity;
(void)etw_severity;
return existing_sink;
#endif // _WIN32
}

Severity OverrideLevelWithEtw(Severity originalSeverity) {
Severity OverrideLevelWithEtw(Severity original_severity) {
#ifdef _WIN32
auto& manager = logging::EtwRegistrationManager::Instance();
if (manager.IsEnabled() &&
(manager.Keyword() & static_cast<ULONGLONG>(onnxruntime::logging::ORTTraceLoggingKeyword::Logs)) != 0) {
return manager.MapLevelToSeverity();
}
#endif // _WIN32
return originalSeverity;
return original_severity;
}

bool LoggingManager::AddSinkOfType(SinkType sink_type, std::function<std::unique_ptr<ISink>()> sinkFactory,
logging::Severity severity) {
std::lock_guard<OrtMutex> guard(sink_mutex_);
if (sink_->GetType() != SinkType::CompositeSink) {
// Current sink is not a composite, create a new composite sink and add the current sink to it
auto new_composite = std::make_unique<CompositeSink>();
new_composite->AddSink(std::move(sink_), default_min_severity_); // Move the current sink into the new composite
sink_ = std::move(new_composite); // Now sink_ is pointing to the new composite
}
// Adjust the default minimum severity level to accommodate new sink needs
default_min_severity_ = std::min(default_min_severity_, severity);
if (s_default_logger_ != nullptr) {
s_default_logger_->SetSeverity(default_min_severity_);
}
CompositeSink* current_composite = static_cast<CompositeSink*>(sink_.get());
if (current_composite->HasType(sink_type)) {
return false; // Sink of this type already exists, do not add another
}

current_composite->AddSink(sinkFactory(), severity);
return true;
}

void LoggingManager::RemoveSink(SinkType sink_type) {
std::lock_guard<OrtMutex> guard(sink_mutex_);

if (sink_->GetType() == SinkType::CompositeSink) {
auto composite_sink = static_cast<CompositeSink*>(sink_.get());

Severity newSeverity = composite_sink->RemoveSink(sink_type);

if (composite_sink->HasOnlyOneSink()) {
// If only one sink remains, replace the CompositeSink with this single sink
sink_ = composite_sink->GetRemoveSingleSink();
}

default_min_severity_ = newSeverity;
if (s_default_logger_ != nullptr) {
s_default_logger_->SetSeverity(default_min_severity_);
}
}
}

} // namespace logging
Expand Down
57 changes: 52 additions & 5 deletions onnxruntime/core/common/logging/sinks/composite_sink.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,17 @@ class CompositeSink : public ISink {
/// Initializes a new instance of the <see cref="CompositeSink"/> class.
/// Use AddSink to add sinks.
/// </summary>
CompositeSink() {}
CompositeSink() : ISink(SinkType::CompositeSink) {}

/// <summary>
/// Check if the composite sink contains a sink of the specified type.
/// </summary>
bool HasType(SinkType sink_type) const {
return std::any_of(sinks_with_severity_.begin(), sinks_with_severity_.end(),
[&](const auto& sink_pair) {
return sink_pair.first->GetType() == sink_type;
});
}

/// <summary>
/// Adds a sink. Takes ownership of the sink (so pass unique_ptr by value).
Expand All @@ -37,11 +47,48 @@ class CompositeSink : public ISink {
}

/// <summary>
/// Gets a const reference to the collection of sinks and min severity for that sink
/// Remove a sink of the specified type.
/// </summary>
/// <param name="sink_type">Sink type to remove</param>
/// <returns>Minimum severity of the remaining sinks</returns>
logging::Severity RemoveSink(SinkType sink_type) {
logging::Severity severity = Severity::kFATAL; // default if we end up with no sinks

// find entries to remove and the minimum severity of the remaining sinks
auto entries_to_remove = std::remove_if(sinks_with_severity_.begin(), sinks_with_severity_.end(),
[&](const auto& entry) {
if (entry.first->GetType() == sink_type) {
return true;
} else {
severity = std::min(severity, entry.second);
return false;
}
});

sinks_with_severity_.erase(entries_to_remove, sinks_with_severity_.end());

return severity;
}

/// <summary>
/// Check if there's only one sink left
/// </summary>
/// <returns> True if only 1 sink remaining </returns>
bool HasOnlyOneSink() const {
return sinks_with_severity_.size() == 1;
}

/// <summary>
/// If one sink is remaining then returns it and empties the composite sink
/// </summary>
/// <returns>A const reference to the vector pair of unique_ptr to ISink and severity.</returns>
const std::vector<std::pair<std::unique_ptr<ISink>, logging::Severity>>& GetSinks() const {
return sinks_with_severity_;
/// <returns> If one sink remains then returns the sink, otherwise nullptr </returns>
std::unique_ptr<ISink> GetRemoveSingleSink() {
if (HasOnlyOneSink()) {
auto single_sink = std::move(sinks_with_severity_.begin()->first);
sinks_with_severity_.clear();
return single_sink;
}
return nullptr;
}

private:
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/platform/telemetry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ void Telemetry::LogSessionCreation(uint32_t session_id, int64_t ir_version, cons
const std::string& model_graph_name,
const std::unordered_map<std::string, std::string>& model_metadata,
const std::string& loadedFrom, const std::vector<std::string>& execution_provider_ids,
bool use_fp16) const {
bool use_fp16, bool captureState) const {
ORT_UNUSED_PARAMETER(session_id);
ORT_UNUSED_PARAMETER(ir_version);
ORT_UNUSED_PARAMETER(model_producer_name);
Expand All @@ -67,6 +67,7 @@ void Telemetry::LogSessionCreation(uint32_t session_id, int64_t ir_version, cons
ORT_UNUSED_PARAMETER(loadedFrom);
ORT_UNUSED_PARAMETER(execution_provider_ids);
ORT_UNUSED_PARAMETER(use_fp16);
ORT_UNUSED_PARAMETER(captureState);
}

void Telemetry::LogRuntimeError(uint32_t session_id, const common::Status& status, const char* file,
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/platform/telemetry.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class Telemetry {
const std::string& model_graph_name,
const std::unordered_map<std::string, std::string>& model_metadata,
const std::string& loadedFrom, const std::vector<std::string>& execution_provider_ids,
bool use_fp16) const;
bool use_fp16, bool captureState) const;

virtual void LogRuntimeError(uint32_t session_id, const common::Status& status, const char* file,
const char* function, uint32_t line) const;
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/platform/windows/logging/etw_sink.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ namespace logging {

class EtwSink : public ISink {
public:
EtwSink() = default;
EtwSink() : ISink(SinkType::EtwSink) {}
~EtwSink() = default;

constexpr static const char* kEventName = "ONNXRuntimeLogEvent";
Expand Down
Loading

0 comments on commit 74028e4

Please sign in to comment.