diff --git a/onnxruntime/contrib_ops/cpu/utils/console_dumper.h b/onnxruntime/contrib_ops/cpu/utils/console_dumper.h index 3c255879df199..2782a59d4326d 100644 --- a/onnxruntime/contrib_ops/cpu/utils/console_dumper.h +++ b/onnxruntime/contrib_ops/cpu/utils/console_dumper.h @@ -37,6 +37,8 @@ class IConsoleDumper { virtual void Print(const char* name, int index, bool end_line) const = 0; virtual void Print(const char* name, const std::string& value, bool end_line) const = 0; + virtual void Print(const std::string& value) const = 0; + protected: bool is_enabled_; }; diff --git a/onnxruntime/contrib_ops/cpu/utils/debug_macros.h b/onnxruntime/contrib_ops/cpu/utils/debug_macros.h index 37a9b0160ade9..d5cbaa0a3e6b7 100644 --- a/onnxruntime/contrib_ops/cpu/utils/debug_macros.h +++ b/onnxruntime/contrib_ops/cpu/utils/debug_macros.h @@ -1,4 +1,5 @@ #pragma once +#include "core/common/make_string.h" // #define DEBUG_GENERATION 1 // uncomment it for debugging generation (like beam search etc) @@ -14,9 +15,11 @@ #if DUMP_CPU_TENSOR_LEVEL > 0 #define DUMP_CPU_TENSOR_INIT() onnxruntime::contrib::CpuTensorConsoleDumper cpu_dumper #define DUMP_CPU_TENSOR(...) cpu_dumper.Print(__VA_ARGS__) +#define DUMP_STRING(...) cpu_dumper.Print(::onnxruntime::MakeString(__VA_ARGS__)) #else #define DUMP_CPU_TENSOR_INIT() #define DUMP_CPU_TENSOR(...) +#define DUMP_STRING(...) #endif #if DUMP_CPU_TENSOR_LEVEL > 1 diff --git a/onnxruntime/contrib_ops/cpu/utils/dump_tensor.cc b/onnxruntime/contrib_ops/cpu/utils/dump_tensor.cc index 3a5deef35d6d6..87a9cd3965763 100644 --- a/onnxruntime/contrib_ops/cpu/utils/dump_tensor.cc +++ b/onnxruntime/contrib_ops/cpu/utils/dump_tensor.cc @@ -1,18 +1,38 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include #include "contrib_ops/cpu/utils/dump_tensor.h" +#include +#include +#include +#include #include "core/framework/print_tensor_utils.h" #include "contrib_ops/cpu/utils/debug_macros.h" +#include "core/platform/env_var_utils.h" namespace onnxruntime { namespace contrib { #if DUMP_CPU_TENSOR_LEVEL > 0 +// Environment variable to enable/disable dumping +constexpr const char* kEnableCpuTensorDumper = "ORT_ENABLE_CPU_DUMP"; + +// Environment variable to enable/disable dumping thread id +constexpr const char* kDumpThreadId = "ORT_DUMP_THREAD_ID"; + +// To avoid dumping at the same time from multiple threads +static std::mutex s_mutex; + +static bool s_output_thread_id = false; + template void DumpCpuTensor(const char* name, const T* tensor, int dim0, int dim1) { + std::unique_lock lock(s_mutex); + + if (s_output_thread_id) + std::cout << "Thread ID:" << std::this_thread::get_id() << std::endl; + if (nullptr != name) { std::cout << std::string(name) << std::endl; } @@ -26,6 +46,11 @@ void DumpCpuTensor(const char* name, const T* tensor, int dim0, int dim1) { template void DumpCpuTensor(const char* name, const T* tensor, int dim0, int dim1, int dim2) { + std::unique_lock lock(s_mutex); + + if (s_output_thread_id) + std::cout << "Thread ID:" << std::this_thread::get_id() << std::endl; + if (nullptr != name) { std::cout << std::string(name) << std::endl; } @@ -93,6 +118,21 @@ void DumpCpuTensor(const char* name, const Tensor& tensor) { DumpCpuTensor(nullptr, tensor, static_cast(num_rows), static_cast(row_size)); } +CpuTensorConsoleDumper::CpuTensorConsoleDumper() { + is_enabled_ = ParseEnvironmentVariableWithDefault(kEnableCpuTensorDumper, 1) != 0; + s_output_thread_id = ParseEnvironmentVariableWithDefault(kDumpThreadId, 0) != 0; +} + +void CpuTensorConsoleDumper::Print(const std::string& value) const { + if (!is_enabled_) + return; + + std::unique_lock lock(s_mutex); + if (s_output_thread_id) + std::cout << "Thread ID:" << std::this_thread::get_id() << std::endl; + std::cout << value << std::endl; +} + void CpuTensorConsoleDumper::Print(const char* name, const float* tensor, int dim0, int dim1) const { if (!is_enabled_) return; @@ -185,6 +225,8 @@ void CpuTensorConsoleDumper::Print(const char* name, const OrtValue& value) cons void CpuTensorConsoleDumper::Print(const char* name, int index, bool end_line) const { if (!is_enabled_) return; + + std::unique_lock lock(s_mutex); std::cout << std::string(name) << "[" << index << "]"; if (end_line) { @@ -196,6 +238,7 @@ void CpuTensorConsoleDumper::Print(const char* name, const std::string& value, b if (!is_enabled_) return; + std::unique_lock lock(s_mutex); std::cout << std::string(name) << "=" << value; if (end_line) { @@ -204,6 +247,12 @@ void CpuTensorConsoleDumper::Print(const char* name, const std::string& value, b } #else +CpuTensorConsoleDumper::CpuTensorConsoleDumper() { +} + +void CpuTensorConsoleDumper::Print(const std::string&) const { +} + void CpuTensorConsoleDumper::Print(const char*, const float*, int, int) const { } @@ -254,7 +303,6 @@ void CpuTensorConsoleDumper::Print(const char*, int, bool) const { void CpuTensorConsoleDumper::Print(const char*, const std::string&, bool) const { } - #endif } // namespace contrib diff --git a/onnxruntime/contrib_ops/cpu/utils/dump_tensor.h b/onnxruntime/contrib_ops/cpu/utils/dump_tensor.h index d902806fd0d18..f102eae6ec709 100644 --- a/onnxruntime/contrib_ops/cpu/utils/dump_tensor.h +++ b/onnxruntime/contrib_ops/cpu/utils/dump_tensor.h @@ -2,6 +2,7 @@ // Licensed under the MIT License. #pragma once +#include #include #include "core/framework/ort_value.h" #include "contrib_ops/cpu/utils/console_dumper.h" @@ -11,7 +12,7 @@ namespace contrib { class CpuTensorConsoleDumper : public IConsoleDumper { public: - CpuTensorConsoleDumper() = default; + CpuTensorConsoleDumper(); virtual ~CpuTensorConsoleDumper() {} void Print(const char* name, const float* tensor, int dim0, int dim1) const override; void Print(const char* name, const MLFloat16* tensor, int dim0, int dim1) const override; @@ -33,6 +34,14 @@ class CpuTensorConsoleDumper : public IConsoleDumper { void Print(const char* name, const OrtValue& value) const override; void Print(const char* name, int index, bool end_line) const override; void Print(const char* name, const std::string& value, bool end_line) const override; + + void Print(const std::string& value) const override; + + // Output a vector with a threshold for max number of elements to output. Default threshold 0 means no limit. + template + void Print(const char* name, const std::vector& vec, size_t max_count = 0) const { + this->Print(name, vec.data(), 1, static_cast(std::min(max_count, vec.size()))); + } }; } // namespace contrib diff --git a/onnxruntime/contrib_ops/cuda/utils/dump_cuda_tensor.cc b/onnxruntime/contrib_ops/cuda/utils/dump_cuda_tensor.cc index fb7af3cfdd54f..e10c2ec63fd51 100644 --- a/onnxruntime/contrib_ops/cuda/utils/dump_cuda_tensor.cc +++ b/onnxruntime/contrib_ops/cuda/utils/dump_cuda_tensor.cc @@ -202,6 +202,10 @@ void DumpGpuTensor(const char* name, const Tensor& tensor) { DumpGpuTensor(nullptr, tensor, static_cast(num_rows), static_cast(row_size)); } +void CudaTensorConsoleDumper::Print(const std::string& value) const { + std::cout << value << std::endl; +} + void CudaTensorConsoleDumper::Print(const char* name, const size_t* tensor, int dim0, int dim1) const { if (is_enabled_) DumpGpuTensor(name, tensor, dim0, dim1, true); @@ -325,6 +329,10 @@ void CudaTensorConsoleDumper::Print(const char* name, const std::string& value, } #else + +void CudaTensorConsoleDumper::Print(const std::string&) const { +} + void CudaTensorConsoleDumper::Print(const char*, const size_t*, int, int) const { } diff --git a/onnxruntime/contrib_ops/cuda/utils/dump_cuda_tensor.h b/onnxruntime/contrib_ops/cuda/utils/dump_cuda_tensor.h index 0f25e85bb97d7..6ad0ad9a67b75 100644 --- a/onnxruntime/contrib_ops/cuda/utils/dump_cuda_tensor.h +++ b/onnxruntime/contrib_ops/cuda/utils/dump_cuda_tensor.h @@ -46,6 +46,8 @@ class CudaTensorConsoleDumper : public onnxruntime::contrib::IConsoleDumper { void Print(const char* name, const OrtValue& value) const override; void Print(const char* name, int index, bool end_line) const override; void Print(const char* name, const std::string& value, bool end_line) const override; + + void Print(const std::string& value) const override; }; } // namespace cuda