Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cherry-pick for 1.17.1 patch release #19477

Merged
merged 23 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
506eddb
Whisper Crash Fix (#19345)
petermcaughan Jan 31, 2024
6e61306
Fix Split index bugs uncovered by QNN SDK 2.19 (#19381)
adrianlizarraga Feb 2, 2024
ad63507
[DML EP] Fix external data unpacking (#19415)
PatriceVignola Feb 7, 2024
a77ee4a
Add contrib Q/DQ ops to symbolic shape inference tool (#19340)
adrianlizarraga Jan 31, 2024
5269e93
[Quant tool] Ensure MSFT opset for Q/DQ models (#19335)
adrianlizarraga Jan 31, 2024
c1ce74d
Windows - Only set thread affinity on Server with auto affinity (#19318)
ivberg Jan 30, 2024
098ef2c
[js/web] fix types exports in package.json (#19458)
fs-eire Feb 8, 2024
f5f5cc8
Add capturestate / rundown ETW support logging for session and provid…
ivberg Feb 8, 2024
e02b783
Disable streams for the DML EP (#19481)
PatriceVignola Feb 10, 2024
14543de
Remove cuda gencode 90 to reduce onnxruntime-training package size (#…
baijumeswani Feb 12, 2024
605adb0
Ovep 1.17.1 (#19482)
preetha-intel Feb 12, 2024
27c0a2f
[QNN EP] Build x64 python wheel for QNN EP (#19499)
adrianlizarraga Feb 13, 2024
61730bd
Fix subgraph quantization regression in onnxruntime 1.17 (#19421)
fxmarty Feb 13, 2024
166488e
Restrict L2 Cache Core check to Intel devices (#19483)
smk2007 Feb 14, 2024
ad02db8
Update the default std flag used during torch extensions compilation …
baijumeswani Feb 14, 2024
4917fff
add ATen support for bicubic interpolation (#19380)
prathikr Feb 5, 2024
34c3623
Optimize KahnsTopologicalSort and PriorityNodeCompare (#19475)
smk2007 Feb 16, 2024
ad86d13
Support ONNX export of OpenAi Whisper model (#17316)
shubhambhokare1 Feb 9, 2024
485e17e
Whisper Timestamps and Temperature (#19509)
kunal-vaishnavi Feb 16, 2024
e79a06b
Enable DML on Windows and CUDA on Linux for Node.js binding (#19274)
jchen351 Feb 5, 2024
e96506e
add option DefaultTensorType to specify the default tensor type to qu…
xadupre Feb 20, 2024
1aa73b2
Disable __cpuid check on arm64 builds as intrinsic is not available (…
smk2007 Feb 20, 2024
d636587
Changed command line argpasrse to process '--symmetric [True|False]'.…
satyajandhyala Feb 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions js/web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,46 +69,56 @@
"exports": {
".": {
"node": "./dist/ort.node.min.js",
"types": "./types.d.ts",
"default": {
"import": "./dist/esm/ort.min.js",
"require": "./dist/cjs/ort.min.js",
"types": "./types.d.ts",
"default": {
"development": "./dist/ort.js",
"types": "./types.d.ts",
"default": "./dist/ort.min.js"
}
}
},
"./experimental": {
"import": "./dist/esm/ort.all.min.js",
"require": "./dist/cjs/ort.all.min.js",
"types": "./types.d.ts",
"default": {
"development": "./dist/ort.all.js",
"types": "./types.d.ts",
"default": "./dist/ort.all.min.js"
}
},
"./wasm": {
"import": "./dist/esm/ort.wasm.min.js",
"require": "./dist/cjs/ort.wasm.min.js",
"types": "./types.d.ts",
"default": "./dist/ort.wasm.min.js"
},
"./wasm-core": {
"import": "./dist/esm/ort.wasm-core.min.js",
"require": "./dist/cjs/ort.wasm-core.min.js",
"types": "./types.d.ts",
"default": "./dist/ort.wasm-core.min.js"
},
"./webgl": {
"import": "./dist/esm/ort.webgl.min.js",
"require": "./dist/cjs/ort.webgl.min.js",
"types": "./types.d.ts",
"default": "./dist/ort.webgl.min.js"
},
"./webgpu": {
"import": "./dist/esm/ort.webgpu.min.js",
"require": "./dist/cjs/ort.webgpu.min.js",
"types": "./types.d.ts",
"default": "./dist/ort.webgpu.min.js"
},
"./training": {
"import": "./dist/esm/ort.training.wasm.min.js",
"require": "./dist/cjs/ort.training.wasm.min.js",
"types": "./types.d.ts",
"default": "./dist/ort.training.wasm.min.js"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ Status BeamSearchGpt<T>::Execute(const FeedsFetchesManager* init_run_feeds_fetch
cpu_state.sequences.InitDevice(beam_state.sequences_device);
ORT_RETURN_IF_ERROR(this->device_copy_int32_func_(beam_state.sequences_device.subspan(0, beam_state.sequences_device.size() / 2),
cpu_state.sequences_space.subspan(0, cpu_state.sequences_space.size() / 2),
nullptr,
this->ort_stream_,
DeviceCopyDirection::hostToDevice));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ Status BeamSearchT5<T>::Execute(const FeedsFetchesManager& encoder_feeds_fetches
cpu_state.sequences.InitDevice(beam_state.sequences_device);
ORT_RETURN_IF_ERROR(this->device_copy_int32_func_(beam_state.sequences_device.subspan(0, beam_state.sequences_device.size() / 2),
cpu_state.sequences_space.subspan(0, cpu_state.sequences_space.size() / 2),
nullptr,
this->ort_stream_,
DeviceCopyDirection::hostToDevice));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ Status BeamSearchWhisper<T>::Execute(const FeedsFetchesManager& encoder_feeds_fe
cpu_state.sequences.InitDevice(beam_state.sequences_device);
ORT_RETURN_IF_ERROR(this->device_copy_int32_func_(beam_state.sequences_device.subspan(0, beam_state.sequences_device.size() / 2),
cpu_state.sequences_space.subspan(0, cpu_state.sequences_space.size() / 2),
nullptr,
this->ort_stream_,
DeviceCopyDirection::hostToDevice));
}

Expand Down
55 changes: 48 additions & 7 deletions onnxruntime/core/framework/execution_providers.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

#pragma once

// #include <map>
#include <memory>
#include <string>
#include <unordered_map>
Expand All @@ -14,7 +13,9 @@
#include "core/common/logging/logging.h"
#ifdef _WIN32
#include <winmeta.h>
#include <evntrace.h>
#include "core/platform/tracing.h"
#include "core/platform/windows/telemetry.h"
#endif

namespace onnxruntime {
Expand Down Expand Up @@ -44,6 +45,49 @@
exec_provider_options_[provider_id] = providerOptions;

#ifdef _WIN32
LogProviderOptions(provider_id, providerOptions, false);

// Register callback for ETW capture state (rundown)
WindowsTelemetry::RegisterInternalCallback(
[this](
LPCGUID SourceId,
ULONG IsEnabled,
UCHAR Level,
ULONGLONG MatchAnyKeyword,
ULONGLONG MatchAllKeyword,
PEVENT_FILTER_DESCRIPTOR FilterData,
PVOID CallbackContext) {
(void)SourceId;
(void)Level;
(void)MatchAnyKeyword;
(void)MatchAllKeyword;
(void)FilterData;
(void)CallbackContext;

// Check if this callback is for capturing state
if ((IsEnabled == EVENT_CONTROL_CODE_CAPTURE_STATE) &&
((MatchAnyKeyword & static_cast<ULONGLONG>(onnxruntime::logging::ORTTraceLoggingKeyword::Session)) != 0)) {

Check warning on line 69 in onnxruntime/core/framework/execution_providers.h

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/framework/execution_providers.h:69: Lines should be <= 120 characters long [whitespace/line_length] [2]
for (size_t i = 0; i < exec_providers_.size(); ++i) {
const auto& provider_id = exec_provider_ids_[i];

auto it = exec_provider_options_.find(provider_id);
if (it != exec_provider_options_.end()) {
const auto& options = it->second;

LogProviderOptions(provider_id, options, true);
}
}
}
});
#endif

exec_provider_ids_.push_back(provider_id);
exec_providers_.push_back(p_exec_provider);
return Status::OK();
}

#ifdef _WIN32
void LogProviderOptions(const std::string& provider_id, const ProviderOptions& providerOptions, bool captureState) {
for (const auto& config_pair : providerOptions) {
TraceLoggingWrite(
telemetry_provider_handle,
Expand All @@ -52,14 +96,11 @@
TraceLoggingLevel(WINEVENT_LEVEL_INFO),
TraceLoggingString(provider_id.c_str(), "ProviderId"),
TraceLoggingString(config_pair.first.c_str(), "Key"),
TraceLoggingString(config_pair.second.c_str(), "Value"));
TraceLoggingString(config_pair.second.c_str(), "Value"),
TraceLoggingBool(captureState, "isCaptureState"));
}
#endif

exec_provider_ids_.push_back(provider_id);
exec_providers_.push_back(p_exec_provider);
return Status::OK();
}
#endif

const IExecutionProvider* Get(const onnxruntime::Node& node) const {
return Get(node.GetExecutionProviderType());
Expand Down
24 changes: 19 additions & 5 deletions onnxruntime/core/platform/windows/telemetry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Licensed under the MIT License.

#include "core/platform/windows/telemetry.h"
#include "core/platform/ort_mutex.h"
#include "core/common/logging/logging.h"
#include "onnxruntime_config.h"

Expand Down Expand Up @@ -63,6 +64,8 @@ bool WindowsTelemetry::enabled_ = true;
uint32_t WindowsTelemetry::projection_ = 0;
UCHAR WindowsTelemetry::level_ = 0;
UINT64 WindowsTelemetry::keyword_ = 0;
std::vector<WindowsTelemetry::EtwInternalCallback> WindowsTelemetry::callbacks_;
OrtMutex WindowsTelemetry::callbacks_mutex_;

WindowsTelemetry::WindowsTelemetry() {
std::lock_guard<OrtMutex> lock(mutex_);
Expand Down Expand Up @@ -104,6 +107,11 @@ UINT64 WindowsTelemetry::Keyword() const {
// return etw_status_;
// }

void WindowsTelemetry::RegisterInternalCallback(const EtwInternalCallback& callback) {
std::lock_guard<OrtMutex> lock(callbacks_mutex_);
callbacks_.push_back(callback);
}

void NTAPI WindowsTelemetry::ORT_TL_EtwEnableCallback(
_In_ LPCGUID SourceId,
_In_ ULONG IsEnabled,
Expand All @@ -112,15 +120,21 @@ void NTAPI WindowsTelemetry::ORT_TL_EtwEnableCallback(
_In_ ULONGLONG MatchAllKeyword,
_In_opt_ PEVENT_FILTER_DESCRIPTOR FilterData,
_In_opt_ PVOID CallbackContext) {
(void)SourceId;
(void)MatchAllKeyword;
(void)FilterData;
(void)CallbackContext;

std::lock_guard<OrtMutex> lock(provider_change_mutex_);
enabled_ = (IsEnabled != 0);
level_ = Level;
keyword_ = MatchAnyKeyword;

InvokeCallbacks(SourceId, IsEnabled, Level, MatchAnyKeyword, MatchAllKeyword, FilterData, CallbackContext);
}

void WindowsTelemetry::InvokeCallbacks(LPCGUID SourceId, ULONG IsEnabled, UCHAR Level, ULONGLONG MatchAnyKeyword,
ULONGLONG MatchAllKeyword, PEVENT_FILTER_DESCRIPTOR FilterData,
PVOID CallbackContext) {
std::lock_guard<OrtMutex> lock(callbacks_mutex_);
for (const auto& callback : callbacks_) {
callback(SourceId, IsEnabled, Level, MatchAnyKeyword, MatchAllKeyword, FilterData, CallbackContext);
}
}

void WindowsTelemetry::EnableTelemetryEvents() const {
Expand Down
15 changes: 14 additions & 1 deletion onnxruntime/core/platform/windows/telemetry.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
// Licensed under the MIT License.

#pragma once
#include <atomic>
#include <vector>

#include "core/platform/telemetry.h"
#include <Windows.h>
#include <TraceLoggingProvider.h>
#include "core/platform/ort_mutex.h"
#include "core/platform/windows/TraceLoggingConfig.h"
#include <atomic>

namespace onnxruntime {

Expand Down Expand Up @@ -58,16 +60,27 @@ class WindowsTelemetry : public Telemetry {

void LogExecutionProviderEvent(LUID* adapterLuid) const override;

using EtwInternalCallback = std::function<void(LPCGUID SourceId, ULONG IsEnabled, UCHAR Level,
ULONGLONG MatchAnyKeyword, ULONGLONG MatchAllKeyword,
PEVENT_FILTER_DESCRIPTOR FilterData, PVOID CallbackContext)>;

static void RegisterInternalCallback(const EtwInternalCallback& callback);

private:
static OrtMutex mutex_;
static uint32_t global_register_count_;
static bool enabled_;
static uint32_t projection_;

static std::vector<EtwInternalCallback> callbacks_;
static OrtMutex callbacks_mutex_;
static OrtMutex provider_change_mutex_;
static UCHAR level_;
static ULONGLONG keyword_;

static void InvokeCallbacks(LPCGUID SourceId, ULONG IsEnabled, UCHAR Level, ULONGLONG MatchAnyKeyword,
ULONGLONG MatchAllKeyword, PEVENT_FILTER_DESCRIPTOR FilterData, PVOID CallbackContext);

static void NTAPI ORT_TL_EtwEnableCallback(
_In_ LPCGUID SourceId,
_In_ ULONG IsEnabled,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -344,20 +344,25 @@ namespace Dml::GraphDescBuilder
dmlFusedNodeInputIndex < isConstGpuGraphInputCount &&
isConstGpuGraphInput[dmlFusedNodeInputIndex])
{
// This is a highly inefficient approach to generating constant nodes. It duplicates constant data
// across the graph input as well as every consumer's unique constant node. However it is currently
// This is a highly inefficient approach to generating constant nodes. It duplicates constant data
// across the graph input as well as every consumer's unique constant node. However it is currently
// only used for small inputs.
uint32_t c_maxConstNodeDataSize = 8;

ComPtr<OnnxTensorWrapper> constantInput = constantCpuGraphInputGetter(arg->Name());

auto& operatorGraphInputNode = graphNodeCreateInfo.nodesAsOperatorDesc[operatorGraphInputEdge.ToNodeIndex];
std::vector<DmlBufferTensorDesc*> toNodeInputTensorDescs = operatorGraphInputNode->GetInputTensors();
DmlBufferTensorDesc* tensorDesc = toNodeInputTensorDescs[operatorGraphInputEdge.ToNodeInputIndex];
ComPtr<OnnxTensorWrapper> constantInput;

if (constantInput && tensorDesc->totalTensorSizeInBytes < c_maxConstNodeDataSize)
if (tensorDesc->totalTensorSizeInBytes < c_maxConstNodeDataSize)
{
// The tensor description's size should be no larger than the constant input unless it was rounded to
constantInput = constantCpuGraphInputGetter(arg->Name());
}

if (constantInput)
{
// The tensor description's size should be no larger than the constant input unless it was rounded to
// the required alignment.
assert(((constantInput->GetTensorByteSize() + 3) & ~3) >= tensorDesc->totalTensorSizeInBytes);
size_t minimumConstantSize = std::min(constantInput->GetTensorByteSize(), gsl::narrow_cast<size_t>(tensorDesc->totalTensorSizeInBytes));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1123,7 +1123,7 @@ namespace Windows::AI::MachineLearning::Adapter
}
ORT_CATCH_RETURN
}

template <class NodeInfoImpl_t, class Base1_t, class Base2_t>
HRESULT STDMETHODCALLTYPE OpNodeInfoWrapper<NodeInfoImpl_t, Base1_t, Base2_t>::GetConstantInputTensor(uint32_t inputIndex, IMLOperatorTensor** tensor) const noexcept
{
Expand Down Expand Up @@ -1168,7 +1168,7 @@ namespace Windows::AI::MachineLearning::Adapter
m_requiredConstantCpuInputs.begin(),
m_requiredConstantCpuInputs.end(),
inputIndex) != m_requiredConstantCpuInputs.end();

// This shouldn't happen since kernel creation is deferred and repeated when required constant inputs are not present.
ORT_THROW_HR_IF(E_UNEXPECTED, inputRequiredAsConstant);
}
Expand Down Expand Up @@ -1562,7 +1562,13 @@ namespace Windows::AI::MachineLearning::Adapter
OnnxTensorWrapper::OnnxTensorWrapper(onnx::TensorProto* impl, const onnxruntime::Path& modelPath) : m_impl(impl)
{
// The tensor may be stored as raw data or in typed fields.
if (impl->has_raw_data())
if (impl->data_location() == onnx::TensorProto_DataLocation_EXTERNAL)
{
THROW_IF_NOT_OK(onnxruntime::utils::UnpackInitializerData(*impl, modelPath, m_unpackedExternalTensor));
m_dataPtr = reinterpret_cast<std::byte*>(m_unpackedExternalTensor.data());
m_tensorByteSize = m_unpackedExternalTensor.size();
}
else if (impl->has_raw_data())
{
m_dataPtr = reinterpret_cast<std::byte*>(impl->mutable_raw_data()->data());
m_tensorByteSize = impl->raw_data().size();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ class OnnxTensorWrapper : public WRL::Base<IMLOperatorTensor>, public Closable
private:
size_t m_tensorByteSize = 0;
std::unique_ptr<std::byte[]> m_unpackedTensor;
std::vector<uint8_t> m_unpackedExternalTensor;
std::byte* m_dataPtr = nullptr;

// Lifetime is managed by the caller and guaranteed to outlive this class
Expand Down
Loading
Loading