From 42b700d463222f9f2af0ef29105baccfdbff1313 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 19 Apr 2024 10:27:42 -0700 Subject: [PATCH] Eliminate stray vector and the contention it creates (#20377) ### Description Unused vector allocating large memory chunk within a concurrent routine creates heap contention and is eliminated. ### Motivation and Context This partially addresses https://github.com/microsoft/onnxruntime/issues/20373. --- .../core/providers/cpu/nn/tfidfvectorizer.cc | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc index eb245a4c9ba0c..60acd870eb43c 100644 --- a/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc +++ b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc @@ -3,12 +3,13 @@ #include "tfidfvectorizer.h" #include "core/common/common.h" +#include "core/common/inlined_containers.h" +#include #include "core/framework/tensor.h" #include "core/platform/threadpool.h" #include -#include -#include +#include namespace onnxruntime { @@ -41,10 +42,15 @@ using NgramPartInt = NgramPart; using NgramPartString = NgramPart; // Avoid recursive class definitions using unique_ptr + forward declaration -using IntMap = std::unordered_map>; +using IntMap = InlinedHashMap>; +#ifndef DISABLE_ABSEIL +using StrMap = absl::flat_hash_map, std::unique_ptr, + std::hash, std::equal_to>; +#else using StrMap = std::unordered_map, std::unique_ptr, std::hash, std::equal_to>; +#endif template <> struct NgramPart { @@ -412,7 +418,6 @@ Status TfIdfVectorizer::Compute(OpKernelContext* ctx) const { is_input_string, num_batches, num_rows, &fn_weight](ptrdiff_t batch_num) { // Frequency holder allocate [B..output_size_] and init all to zero. auto work = concurrency::ThreadPool::PartitionWork(batch_num, num_batches, static_cast(num_rows)); - std::vector frequencies(this->impl_->output_size_); for (auto row_num = work.start; row_num < work.end; ++row_num) { auto out = gsl::span(output_data + row_num * this->impl_->output_size_, this->impl_->output_size_); std::fill(out.begin(), out.end(), 0.0f);