Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/microsoft/onnxruntime into …
Browse files Browse the repository at this point in the history
…baijumeswani/fix-pipelines
  • Loading branch information
baijumeswani committed Mar 25, 2024
2 parents f3bd524 + 7d976cf commit 4d15d36
Show file tree
Hide file tree
Showing 37 changed files with 3,938 additions and 599 deletions.
98 changes: 86 additions & 12 deletions cmake/patches/abseil/absl_windows.patch
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,91 @@ index a6efc98e..8c4de8e7 100644
"/wd4800",
]
diff --git a/absl/copts/copts.py b/absl/copts/copts.py
index 0d6c1ec3..75fd935f 100644
index e6e11949..0aa7d868 100644
--- a/absl/copts/copts.py
+++ b/absl/copts/copts.py
@@ -132,10 +132,6 @@ COPT_VARS = {
"/wd4068", # unknown pragma
# qualifier applied to function type has no meaning; ignored
"/wd4180",
- # conversion from 'type1' to 'type2', possible loss of data
- "/wd4244",
- # conversion from 'size_t' to 'type', possible loss of data
- "/wd4267",
# The decorated name was longer than the compiler limit
"/wd4503",
# forcing value to bool 'true' or 'false' (performance warning)
@@ -115,10 +115,6 @@ MSVC_WARNING_FLAGS = [
"/wd4068", # unknown pragma
# qualifier applied to function type has no meaning; ignored
"/wd4180",
- # conversion from 'type1' to 'type2', possible loss of data
- "/wd4244",
- # conversion from 'size_t' to 'type', possible loss of data
- "/wd4267",
# The decorated name was longer than the compiler limit
"/wd4503",
# forcing value to bool 'true' or 'false' (performance warning)
diff --git a/absl/debugging/symbolize_win32.inc b/absl/debugging/symbolize_win32.inc
index 53a099a1..34d210d6 100644
--- a/absl/debugging/symbolize_win32.inc
+++ b/absl/debugging/symbolize_win32.inc
@@ -35,15 +35,15 @@ ABSL_NAMESPACE_BEGIN

static HANDLE process = NULL;

-void InitializeSymbolizer(const char*) {
- if (process != nullptr) {
- return;
- }
+namespace {
+void InitializeSymbolizerImpl() {
+
process = GetCurrentProcess();

// Symbols are not loaded until a reference is made requiring the
// symbols be loaded. This is the fastest, most efficient way to use
// the symbol handler.
+
SymSetOptions(SYMOPT_DEFERRED_LOADS | SYMOPT_UNDNAME);
if (!SymInitialize(process, nullptr, true)) {
// GetLastError() returns a Win32 DWORD, but we assign to
@@ -54,6 +54,36 @@ void InitializeSymbolizer(const char*) {
}
}

+bool LookupAndInitialize(const void* pc, SYMBOL_INFO* symbol) {
+ auto hProcess = (process != NULL) ? process : GetCurrentProcess();
+ if (SymFromAddr(hProcess, reinterpret_cast<DWORD64>(pc), nullptr, symbol) != TRUE) {
+ if (GetLastError() == ERROR_INVALID_HANDLE && process == NULL) {
+ InitializeSymbolizerImpl();
+ if (SymFromAddr(process, reinterpret_cast<DWORD64>(pc), nullptr, symbol) != TRUE) {
+ return false;
+ }
+ } else {
+ return false;
+ }
+ return false;
+ }
+ return true;
+}
+}
+
+void InitializeSymbolizer(const char*) {
+ if (process != nullptr) {
+ return;
+ }
+
+ alignas(SYMBOL_INFO) char buf[sizeof(SYMBOL_INFO) + MAX_SYM_NAME];
+ SYMBOL_INFO* symbol = reinterpret_cast<SYMBOL_INFO*>(buf);
+ symbol->SizeOfStruct = sizeof(SYMBOL_INFO);
+ symbol->MaxNameLen = MAX_SYM_NAME;
+
+ static_cast<void>(LookupAndInitialize(reinterpret_cast<const void*>(&InitializeSymbolizer), symbol));
+}
+
bool Symbolize(const void* pc, char* out, int out_size) {
if (out_size <= 0) {
return false;
@@ -62,9 +92,11 @@ bool Symbolize(const void* pc, char* out, int out_size) {
SYMBOL_INFO* symbol = reinterpret_cast<SYMBOL_INFO*>(buf);
symbol->SizeOfStruct = sizeof(SYMBOL_INFO);
symbol->MaxNameLen = MAX_SYM_NAME;
- if (!SymFromAddr(process, reinterpret_cast<DWORD64>(pc), nullptr, symbol)) {
+
+ if(!LookupAndInitialize(pc, symbol)) {
return false;
}
+
const size_t out_size_t = static_cast<size_t>(out_size);
strncpy(out, symbol->Name, out_size_t);
if (out[out_size_t - 1] != '\0') {
Original file line number Diff line number Diff line change
Expand Up @@ -164,17 +164,14 @@ export const createConv2DTransposeMatMulProgramInfo =
const outWidth = isChannelsLast ? outputShape[2] : outputShape[3];
const outHeight = isChannelsLast ? outputShape[1] : outputShape[2];
const outChannels = isChannelsLast ? outputShape[3] : outputShape[1];
const isVec4 =
isChannelsLast ? inChannels % 4 === 0 && outChannels % 4 === 0 : outWidth % 4 === 0 && outChannels % 4 === 0;
// TODO: enable vec4 for NCHW
const isVec4 = isChannelsLast && (inChannels % 4 === 0 && inChannels % 3) && outChannels % 4 === 0;

// TODO: fine tune size
const dispatchX = isChannelsLast ? outChannels : outWidth * outHeight;
const dispatchY = isChannelsLast ? outWidth * outHeight : outChannels;
const workGroupSize: [number, number, number] = isVec4 ?
[8, 8, 1] :
[(dispatchX <= 4 || dispatchY <= 4) ? 4 : 16, dispatchX > 4 && dispatchY <= 4 ? 4 : 16, 1];
const elementsPerThread =
isVec4 ? [4, 4, 1] : [dispatchX <= 4 ? 1 : 4, dispatchX > 4 && dispatchY <= 4 ? 1 : 4, 1];
const workGroupSize: [number, number, number] = [8, 8, 1];
const elementsPerThread = dimAOuter <= 8 ? [4, 1, 1] : [4, 4, 1];
const dispatch = [
Math.ceil(dispatchX / workGroupSize[0] / elementsPerThread[0]),
Math.ceil(dispatchY / workGroupSize[1] / elementsPerThread[1]),
Expand Down
Loading

0 comments on commit 4d15d36

Please sign in to comment.