Skip to content

Commit

Permalink
Finished serialization.
Browse files Browse the repository at this point in the history
  Certain kernels do not share their pre-packed weights, they simply
  keep them to themselves. TODO: Make them share.
  • Loading branch information
yuslepukhin committed Dec 5, 2024
1 parent df8f630 commit 0584f09
Show file tree
Hide file tree
Showing 9 changed files with 176 additions and 173 deletions.
14 changes: 8 additions & 6 deletions include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -1489,12 +1489,14 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
Status AddConstantProtoAsInitializer(const ONNX_NAMESPACE::NodeProto& constant_node_proto,
std::optional<std::string_view> new_name);

ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitiallizersImpl(const std::filesystem::path& model_path,
const std::filesystem::path& external_file_path,
const ModelSavingOptions& model_saving_options,
ONNX_NAMESPACE::GraphProto& graph_proto,
std::ostream& external_stream,
int64_t& external_offset) const;
Status ToGraphProtoWithExternalInitiallizersImpl(
const std::filesystem::path& model_path,
const std::filesystem::path& external_file_path,
const std::filesystem::path& modified_external_file_path,
const ModelSavingOptions& model_saving_options,
ONNX_NAMESPACE::GraphProto& graph_proto,
std::ostream& external_stream,
int64_t& external_offset) const;

#endif

Expand Down
2 changes: 1 addition & 1 deletion include/onnxruntime/core/graph/model_saving_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@ struct ModelSavingOptions {
const PrepackedForSerialization* prepacked_for_save = nullptr;
};

}
} // namespace onnxruntime
34 changes: 24 additions & 10 deletions onnxruntime/core/framework/prepacked_weights_container.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,21 @@ PrepackedForSerialization::PrepackedForSerialization()

PrepackedForSerialization::~PrepackedForSerialization() = default;

void PrepackedForSerialization::Subgraph::Insert(std::string key, PrePackedWeights&& packed_weight) {
void PrepackedForSerialization::Subgraph::InsertFromDisk(std::string key, PrePackedWeights&& packed_weight) {
auto result = key_to_blobs_.emplace(std::move(key), std::move(packed_weight));
ORT_ENFORCE(result.second, "Duplicate pre-packed weight from disk");
}

bool PrepackedForSerialization::Subgraph::CreateOrOverWrite(const std::string& weight_name, std::string key,
PrePackedWeights&& packed_weight) {
// We overwrite the existing key. This is necessary in case we already have a pre-packed weight
// mapped from disk, but we want to overwrite it with our most recent pre-packed version.
auto result = key_to_blobs_.insert_or_assign(std::move(key), std::move(packed_weight));
weight_to_pre_packs_[weight_name].push_back(result.first);
return result.second;
bool PrepackedForSerialization::Subgraph::WritePackedForSaving(const std::string& weight_name, const std::string& key,
PrePackedWeights&& packed_weight) {
auto hit = key_to_blobs_.find(key);
if (hit == key_to_blobs_.end()) {
auto result = key_to_blobs_.insert({key, std::move(packed_weight)});
sorted_by_weight_for_writing_[weight_name].push_back(result.first);
return true;
}
hit->second = std::move(packed_weight);
return false;
}

const PrePackedWeights* PrepackedForSerialization::Subgraph::GetPrepackedWeights(const std::string& key) const {
Expand Down Expand Up @@ -96,12 +99,23 @@ std::optional<PrePackedWeights> PrepackedForSerialization::TakePrepackedWeights(
return result;
}

PrepackedForSerialization::Subgraph& PrepackedForSerialization::FindOrCreateSubgraph(const Graph& graph) {
PrepackedForSerialization::Subgraph& PrepackedForSerialization::FindOrCreatePrepackedGraph(const Graph& graph) {
if (graph.ParentGraph() == nullptr) {
return main_graph_;
}
auto& parent = FindOrCreateSubgraph(*graph.ParentGraph());
auto& parent = FindOrCreatePrepackedGraph(*graph.ParentGraph());
return parent.GetOrCreateSubgraph(graph);
}

const PrepackedForSerialization::Subgraph* PrepackedForSerialization::FindPrepackedGraph(const Graph& graph) const {
if (graph.ParentGraph() == nullptr) {
return &main_graph_;
}
auto* parent = FindPrepackedGraph(*graph.ParentGraph());
if (parent != nullptr) {
parent = parent->GetSubgraph(graph);
}
return parent;
}

} // namespace onnxruntime
26 changes: 18 additions & 8 deletions onnxruntime/core/framework/prepacked_weights_container.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ class PrepackedForSerialization final {
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(PrepackedForSerialization);

using KeyToBlobMap = std::unordered_map<std::string, PrePackedWeights>;
using KeyToBlobMapIterator = KeyToBlobMap::iterator;
using BlobsInderect = std::vector<KeyToBlobMapIterator>;
using KeyToBlobMapConstIterator = KeyToBlobMap::const_iterator;
using BlobsInderect = std::vector<KeyToBlobMapConstIterator>;
using BlobsConstIterator = BlobsInderect::const_iterator;

// Maps weight name to iterators in key_to_blobs_. It associates a weight name with its pre-packs.
Expand Down Expand Up @@ -130,11 +130,10 @@ class PrepackedForSerialization final {
return it == subgraph_prepacks_.end() ? nullptr : it->second.get();
}

// This does not populate per-initializer structures.
void Insert(std::string key, PrePackedWeights&& packed_weight);
void InsertFromDisk(std::string key, PrePackedWeights&& packed_weight);

bool CreateOrOverWrite(const std::string& weight_name, std::string key,
PrePackedWeights&& packed_weight);
bool WritePackedForSaving(const std::string& weight_name, const std::string& key,
PrePackedWeights&& packed_weight);

const PrePackedWeights* GetPrepackedWeights(const std::string& key) const;

Expand All @@ -148,11 +147,20 @@ class PrepackedForSerialization final {
save_mode_on_ = value;
}

// Returns iterators to key->blob pair for writing
const BlobsInderect* GetBlobsForWeight(const std::string& weight_name) const {
auto hit = sorted_by_weight_for_writing_.find(weight_name);
if (hit != sorted_by_weight_for_writing_.end()) {
return &hit->second;
}
return nullptr;
}

private:
bool save_mode_on_;
Subgraph* parent_ = nullptr;
KeyToBlobMap& key_to_blobs_;
WeightToPrePacksMap weight_to_pre_packs_;
WeightToPrePacksMap sorted_by_weight_for_writing_;
// Map Graph ptr to subgraphs
std::unordered_map<const Graph*, std::unique_ptr<Subgraph>> subgraph_prepacks_;
};
Expand All @@ -179,7 +187,9 @@ class PrepackedForSerialization final {

std::optional<PrePackedWeights> TakePrepackedWeights(const std::string& key);

Subgraph& FindOrCreateSubgraph(const Graph& graph);
Subgraph& FindOrCreatePrepackedGraph(const Graph& graph);

const Subgraph* FindPrepackedGraph(const Graph& graph) const;

private:
// Map of key to pre-packed blobs.This is common for all subgraphs
Expand Down
27 changes: 13 additions & 14 deletions onnxruntime/core/framework/session_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -387,19 +387,18 @@ static Status KernelUseSharedPrePackedBuffers(OpKernel& kernel, int input_idx,
return Status::OK();
}

// Here we use the data that is owned by somebody else
static void SavePrepackedDataForWriting(const std::string& weight_name,
const std::string& key,
const PrePackedWeights& prepacked_weights,
PrepackedForSerialization::Subgraph& prepacked_subgraph) {
static void WritePrepackedForSaving(const std::string& weight_name,
const std::string& key,
const PrePackedWeights& prepacked_weights,
PrepackedForSerialization::Subgraph& prepacked_subgraph) {
PrePackedWeights weights_for_saving;
for (const auto& prepacked_buffer : prepacked_weights.buffers_) {
// BufferDeleter is nullptr because we do not own the data
// BufferDeleter is nullptr because we do not own the data in this case
weights_for_saving.buffers_.emplace_back(prepacked_buffer.get(), BufferDeleter(nullptr));
}

weights_for_saving.buffer_sizes_ = prepacked_weights.buffer_sizes_;
prepacked_subgraph.CreateOrOverWrite(weight_name, key, std::move(weights_for_saving));
prepacked_subgraph.WritePackedForSaving(weight_name, key, std::move(weights_for_saving));
}

static std::string GenerateKeyForPrepackedWeightsMap(const std::string& op_type,
Expand All @@ -417,7 +416,7 @@ Status SessionState::PrepackConstantInitializedTensors(
const std::unordered_map<std::string, const OrtValue*>& initializers_to_share_map) {
auto prepacked_constant_weights = [this, &constant_initializers_use_count, &initializers_to_share_map](
bool should_cache_prepacked_weights_for_shared_initializers) -> Status {
auto& prepacked_subgraph = prepacked_weights_for_serialization_.FindOrCreateSubgraph(graph_);
auto& prepacked_subgraph = prepacked_weights_for_serialization_.FindOrCreatePrepackedGraph(graph_);

for (auto& node : GetGraphViewer().Nodes()) {
auto kernel = GetMutableKernel(node.Index());
Expand Down Expand Up @@ -492,8 +491,8 @@ Status SessionState::PrepackConstantInitializedTensors(
if (prepacked_weights_for_serialization_.IsSaveModeOn()) {
// Here we take references to the shared container owned data, so we unmap any entries
// that we are mapping from disk
SavePrepackedDataForWriting(input_name, prepacked_weights_container_key, prepacked_shared,
prepacked_subgraph);
WritePrepackedForSaving(input_name, prepacked_weights_container_key, prepacked_shared,
prepacked_subgraph);
}

} else { // container doesn't contain the pre-packed weight - so write into it for sharing across kernel instances
Expand Down Expand Up @@ -523,8 +522,8 @@ Status SessionState::PrepackConstantInitializedTensors(
if (prepacked_weights_for_serialization_.IsSaveModeOn()) {
// Here we take references to the shared container owned data, so we unmap any entries
// that we are mapping from disk, so we write the most fresh data possible
SavePrepackedDataForWriting(input_name, prepacked_weights_container_key, shared_prepacked,
prepacked_subgraph);
WritePrepackedForSaving(input_name, prepacked_weights_container_key, shared_prepacked,
prepacked_subgraph);
}
}
}
Expand Down Expand Up @@ -554,8 +553,8 @@ Status SessionState::PrepackConstantInitializedTensors(

if (prepacked_subgraph.IsSaveModeOn() || weights_to_use == nullptr) {
// In this case pre-packed container owns the data
prepacked_subgraph.CreateOrOverWrite(input_name, prepacked_weights_container_key,
std::move(weights_to_be_filled_in));
prepacked_subgraph.WritePackedForSaving(input_name, prepacked_weights_container_key,
std::move(weights_to_be_filled_in));
weights_to_use = prepacked_subgraph.GetPrepackedWeights(prepacked_weights_container_key);
assert(weights_to_use != nullptr);
}
Expand Down
53 changes: 38 additions & 15 deletions onnxruntime/core/framework/tensor_external_data_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "tensor_external_data_info.h"
#include "core/common/common.h"
#include "core/common/narrow.h"
#include "core/common/safeint.h"
#include "core/common/string_utils.h"
#include "core/platform/path_lib.h"

Expand Down Expand Up @@ -54,8 +55,9 @@ Status ExternalDataInfo::Create(const RepeatedPtrField<StringStringEntryProto>&
} else if (stringmap.key() == "checksum" && !stringmap.value().empty()) {
out->checksum_ = stringmap.value();
} else if (stringmap.key().find("prepacked", 0) == 0) {
// Starts with 'prepacked'. Each prepacked entry may have multiple blobs with the same key
// we output them with the same key
// Starts with 'prepacked', each has its own key.
// Each prepacked entry may have multiple blobs with the same key
// we output them with the same key
// format = key|offset;length;checksum[|offset;length;checksum]
// We are ignoring invalid entries (should not be any), and rely
// on in memory pre-packs regenerated in this case.
Expand Down Expand Up @@ -114,17 +116,38 @@ void ExternalDataInfo::SetExternalLocationToProto(const std::filesystem::path& e
length->set_value(std::to_string(tensor_bytes_size));
}

// void ExternalDataInfo::AddPrepackedEntriesToProto(
// const PrepackedForSerialization::BlobsInderect& prepacked_for_write, ::ONNX_NAMESPACE::TensorProto& proto) {
// size_t prepack_count = 0;
// std::stringstream os;
// for (auto iter : prepacked_for_write) {
// const auto& [key, prepacked_weights] = *iter;
// os << key << '|';
// const size_t blob_num = prepacked_weights.buffers_.size();
// for (size_t i = 0; blob_num; ++i) {
// //XXX: Need offset calculation
// // os << ed_weights.blobs_[i].offset << ';';
// }
// }
std::ostream& ExternalDataInfo::AddPrepackedEntriesToProto(
const PrepackedForSerialization::BlobsInderect& prepacked_for_write, bool align, int64_t allocation_granularity,
std::ostream& os, int64_t& external_offset, ::ONNX_NAMESPACE::TensorProto& proto) {
for (const auto& iter : prepacked_for_write) {
size_t prepack_count = 0;
const auto& [key, prepacked_weights] = *iter;
std::stringstream prepacked_entry;
prepacked_entry << key << "|";
for (size_t i = 0, size = prepacked_weights.buffers_.size(); i < size; ++i) {
if (align) {
// return early on error
if (!AlignAndPad(os, allocation_granularity, external_offset)) {
return os;
}
}
const auto size_in_bytes = prepacked_weights.buffer_sizes_[i];
if (prepack_count++ > 0) {
prepacked_entry << "|";
}
// Checksum is currently not validated
prepacked_entry << external_offset << ";" << size_in_bytes << ";0";
if (!os.write(reinterpret_cast<const char*>(prepacked_weights.buffers_[i].get()), size_in_bytes)) {
return os;
}
external_offset = SafeInt<int64_t>(external_offset) + size_in_bytes;
}
auto* prepacked = proto.add_external_data();
std::string prepacked_key("prepacked_");
prepacked_key.append(std::to_string(prepack_count));
prepacked->set_key(std::move(prepacked_key));
prepacked->set_value(prepacked_entry.str());
}
return os;
}
} // namespace onnxruntime
30 changes: 28 additions & 2 deletions onnxruntime/core/framework/tensor_external_data_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#pragma once

#include <filesystem>
#include <ostream>
#include <string>
#include <tuple>

Expand Down Expand Up @@ -39,8 +40,33 @@ class ExternalDataInfo {
size_t tensor_bytes_size,
::ONNX_NAMESPACE::TensorProto& proto);

static void AddPrepackedEntriesToProto(const PrepackedForSerialization::BlobsInderect& prepacked_for_write,
::ONNX_NAMESPACE::TensorProto& proto);
// Pads the output with zeros according to the specified allocation_granularity
// It updates external_offset for alignment.
// need to do padding before write actual tensor data as we do offset alignment at the begin of
// large tensors (offset need to be page aligned and allocation granularity aligned) like below:
// \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
// |<---smaller tensor---->|<---padding--->|<------------------large tensor----------------------------->|
static std::ostream& AlignAndPad(std::ostream& stream, int64_t allocation_granularity, int64_t& external_offset) {
// Align to the larger of the page size or the allocation granularity
int64_t alignment_factor = std::max(static_cast<int64_t>(4096), allocation_granularity);
// Align to the next page or alloc granularity boundary
int64_t new_external_offset = static_cast<int64_t>(
std::floor((external_offset + alignment_factor - 1) / alignment_factor)) *
alignment_factor;

// padding tensor with zeros for alignment
for (int64_t index = external_offset; index != new_external_offset; ++index) {
stream << '\0';
}
external_offset = new_external_offset;
return stream;
}

static std::ostream& AddPrepackedEntriesToProto(const PrepackedForSerialization::BlobsInderect& prepacked_for_write,
bool align, int64_t allocation_granularity,
std::ostream& os,
int64_t& external_offset,
::ONNX_NAMESPACE::TensorProto& proto);

using PrepackedInfo = std::tuple<OFFSET_TYPE, size_t, std::string>;
using PrepackedInfos = std::unordered_map<std::string, std::vector<PrepackedInfo>>;
Expand Down
4 changes: 3 additions & 1 deletion onnxruntime/core/framework/tensorprotoutils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,8 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo
if (prepacked_info != nullptr && !prepacked_infos->empty()) {
for (const auto& [key, blobs] : *prepacked_infos) {
PrePackedWeights prepacked_weights;
prepacked_weights.buffers_.reserve(blobs.size());
prepacked_weights.buffer_sizes_.reserve(blobs.size());
for (const auto& blob : blobs) {
const auto blob_offset = std::get<0>(blob);
const auto blob_length = std::get<1>(blob);
Expand All @@ -1074,7 +1076,7 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo
prepacked_weights.buffer_sizes_.push_back(blob_length);
}
if (!blobs.empty()) {
prepacked_info->Insert(key, std::move(prepacked_weights));
prepacked_info->InsertFromDisk(key, std::move(prepacked_weights));
}
}
}
Expand Down
Loading

0 comments on commit 0584f09

Please sign in to comment.