Skip to content

Commit

Permalink
enable large initializer offset align for save external data in ORT
Browse files Browse the repository at this point in the history
  • Loading branch information
frank-dong-ms committed Aug 2, 2024
1 parent 1637f22 commit 7069ec9
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 16 deletions.
14 changes: 13 additions & 1 deletion include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -1144,11 +1144,23 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
@param model_file_path path of the model file.
@param initializer_size_threshold initializers larger or equal to this threshold (in bytes) are saved
in the external file. Initializer smaller than this threshold are included in the onnx file.
@align_offset offset will always be page aligned and alloction granularity aligned for mmap support.
This is done by padding previous tensor data with zeros keeping same length.
Tensor data will be aligned if > align_threshold
@align_threshold alignment threshold for size of data.
Having a low threshold will waste file space for small initializers.
Only when tensor's data is > the page_align_threshold it will be force aligned.
Default to 1MB.
@allocation_granularity the allocation Granularity for mmap() support.
Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
@returns GraphProto serialization of the graph.
*/
ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
const std::filesystem::path& model_file_path,
size_t initializer_size_threshold) const;
size_t initializer_size_threshold,
bool align_offset = FALSE,
size_t align_threshold = 1048576,
size_t allocation_granularity = 65536) const;

/** Gets the ISchemaRegistry instances being used with this Graph. */
IOnnxRuntimeOpSchemaCollectionPtr GetSchemaRegistry() const;
Expand Down
21 changes: 20 additions & 1 deletion onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4021,7 +4021,10 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {

ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
const std::filesystem::path& model_file_path,
size_t initializer_size_threshold) const {
size_t initializer_size_threshold,
bool align_offset,
size_t align_threshold,
size_t allocation_granularity) const {
GraphProto result;
ToGraphProtoInternal(result);
ORT_ENFORCE(external_file_path.is_relative());
Expand Down Expand Up @@ -4063,6 +4066,22 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
external_stream << raw_data[index];
}

// update external_offset for alignment
if (align_offset && tensor_bytes_size > align_threshold) {
// Align to the larger of the page size or the allocation granularity
size_t alignment_factor = std::max(static_cast<size_t>(4096), allocation_granularity);
// Align to the next page or alloc granularity boundary
size_t new_external_offset = static_cast<size_t>(
std::floor((external_offset + alignment_factor - 1) / alignment_factor)) * alignment_factor;

// padding tensor with zeros for alignment
for (size_t index = external_offset; index != new_external_offset; ++index) {
external_stream << '0';
}

external_offset = new_external_offset;
}

output_proto->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL);
ONNX_NAMESPACE::StringStringEntryProto* location = output_proto->add_external_data();
location->set_key("location");
Expand Down
36 changes: 28 additions & 8 deletions onnxruntime/core/graph/model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -383,12 +383,18 @@ ModelProto Model::ToProto() const {

ModelProto Model::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name,
const std::filesystem::path& file_path,
size_t initializer_size_threshold) const {
size_t initializer_size_threshold,
bool align_offset,
size_t align_threshold,
size_t allocation_granularity) const {
ModelProto result(model_proto_);
const auto& graph = *graph_;
*(result.mutable_graph()) = graph.ToGraphProtoWithExternalInitializers(external_file_name,
file_path,
initializer_size_threshold);
initializer_size_threshold,
align_offset,
align_threshold,
allocation_granularity);
return result;
}

Expand Down Expand Up @@ -605,14 +611,19 @@ template <typename T>
static Status SaveModelWithExternalInitializers(Model& model,
const T& file_path,
const std::filesystem::path& external_file_name,
size_t initializer_size_threshold) {
size_t initializer_size_threshold,
bool align_offset = FALSE,
size_t align_threshold = 1048576,
size_t allocation_granularity = 65536) {
int fd = 0;
Status status = Env::Default().FileOpenWr(file_path, fd);
ORT_RETURN_IF_ERROR(status);

ORT_TRY {
status = Model::SaveWithExternalInitializers(model, fd, file_path, external_file_name,
initializer_size_threshold);
initializer_size_threshold,
align_offset, align_threshold,
allocation_granularity);
}
ORT_CATCH(const std::exception& ex) {
ORT_HANDLE_EXCEPTION([&]() {
Expand Down Expand Up @@ -642,8 +653,12 @@ Status Model::Load(const PathString& file_path, std::shared_ptr<Model>& p_model,

Status Model::SaveWithExternalInitializers(Model& model, const std::filesystem::path& file_path,
const std::filesystem::path& external_file_name,
size_t initializer_size_threshold) {
return SaveModelWithExternalInitializers(model, file_path, external_file_name, initializer_size_threshold);
size_t initializer_size_threshold,
bool align_offset,
size_t align_threshold,
size_t allocation_granularity) {
return SaveModelWithExternalInitializers(model, file_path, external_file_name, initializer_size_threshold,
align_offset, align_threshold, allocation_granularity);
}

Status Model::LoadFromBytes(int count, void* p_bytes, /*out*/ ONNX_NAMESPACE::ModelProto& model_proto) {
Expand Down Expand Up @@ -759,15 +774,20 @@ Status Model::SaveWithExternalInitializers(Model& model,
int fd,
const std::filesystem::path& file_path,
const std::filesystem::path& external_file_name,
size_t initializer_size_threshold) {
size_t initializer_size_threshold,
bool align_offset,
size_t align_threshold,
size_t allocation_granularity) {
if (fd < 0) {
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "<fd> is less than 0.");
}

ORT_RETURN_IF_ERROR(model.MainGraph().Resolve());

auto model_proto = model.ToGraphProtoWithExternalInitializers(external_file_name, file_path,
initializer_size_threshold);
initializer_size_threshold,
align_offset, align_threshold,
allocation_granularity);
google::protobuf::io::FileOutputStream output(fd);
const bool result = model_proto.SerializeToZeroCopyStream(&output) && output.Flush();
if (result) {
Expand Down
17 changes: 14 additions & 3 deletions onnxruntime/core/graph/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,25 +187,36 @@ class Model {
// Get model's serialization proto data.
// Save initializer larger than the given threshold (in bytes) into an external binary file
// with the given name. This function is useful to avoid hitting the size limit of protobuf files.
// initializer offset could be page aligned and allocation granularity aligned for mmap support.

Check warning on line 190 in onnxruntime/core/graph/model.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4] Raw Output: onnxruntime/core/graph/model.h:190: Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4]
ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name,
const std::filesystem::path& file_path,
size_t initializer_size_threshold) const;
size_t initializer_size_threshold,
bool align_offset = FALSE,
size_t align_threshold = 1048576,
size_t allocation_granularity = 65536) const;

static common::Status Save(Model& model, const PathString& file_path);

static common::Status Save(Model& model, int fd);

// Save the model to file using an external file for initializers larger than the given threshold (in bytes).
// Initializer offset could be page aligned and allocation granularity aligned for mmap support.
static common::Status SaveWithExternalInitializers(Model& model,
const std::filesystem::path& file_path,
const std::filesystem::path& external_file_path,
size_t initializer_size_threshold);
size_t initializer_size_threshold,
bool align_offset = FALSE,
size_t align_threshold = 1048576,
size_t allocation_granularity = 65536);

static common::Status SaveWithExternalInitializers(Model& model,
int fd,
const std::filesystem::path& file_path,
const std::filesystem::path& external_file_path,
size_t initializer_size_threshold);
size_t initializer_size_threshold,
bool align_offset = FALSE,
size_t align_threshold = 1048576,
size_t allocation_granularity = 65536);

static common::Status Load(std::istream& model_istream, ONNX_NAMESPACE::ModelProto* p_model_proto);

Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2055,7 +2055,8 @@ common::Status InferenceSession::Initialize() {
ORT_RETURN_IF_ERROR_SESSIONID_(Model::SaveWithExternalInitializers(*model_,
session_options_.optimized_model_filepath,
optimized_model_external_initializers_file_name,
optimized_model_external_initializers_min_size_in_bytes));
optimized_model_external_initializers_min_size_in_bytes,
TRUE));

Check warning on line 2059 in onnxruntime/core/session/inference_session.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/session/inference_session.cc:2059: Lines should be <= 120 characters long [whitespace/line_length] [2]
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,17 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
const std::filesystem::path& input_external_init_file,
const std::filesystem::path& output_onnx,
const std::filesystem::path& output_external_init_file,
size_t initializer_size_threshold) {
size_t initializer_size_threshold,
bool align_offset = false,
size_t align_threshold = 1,
size_t allocation_granularity = 4096) {
auto logger = DefaultLoggingManager().CreateLogger("LoadSaveAndCompareModel");
std::shared_ptr<Model> model;
ORT_RETURN_IF_ERROR(Model::Load(input_onnx, model, nullptr, *logger));
std::filesystem::remove(output_onnx);
std::filesystem::remove(output_external_init_file);
ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, output_external_init_file, initializer_size_threshold));
ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, output_external_init_file, initializer_size_threshold,
align_offset, align_threshold, allocation_granularity));

std::shared_ptr<Model> model_from_external;
ORT_RETURN_IF_ERROR(Model::Load(output_onnx.native(), model_from_external, nullptr, *logger));
Expand Down Expand Up @@ -75,6 +79,17 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,

ORT_RETURN_IF_NOT(tensor_proto_size == from_external_tensor_proto_size, "size mismatch");
ORT_RETURN_IF_NOT(memcmp(tensor_proto_data.data(), from_external_tensor_proto_data.data(), tensor_proto_size) == 0, "data mismatch");

if (align_offset) {
for (const StringStringEntryProto& entry : from_external_tensor_proto->external_data()) {
if (entry.has_key() && entry.has_value() && entry.key() == "offset") {
size_t tensor_offset;
std::stringstream stream(entry.value());
stream >> tensor_offset;
ORT_RETURN_IF_NOT(tensor_offset % allocation_granularity == 0, "tensor offset not align");
}
}
}
}
// Cleanup.
ORT_RETURN_IF_NOT(std::filesystem::remove(output_onnx), "delete file failed");
Expand All @@ -92,5 +107,10 @@ TEST(SaveWithExternalInitializers, ModelWithOriginalExternalData) {
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0));
}

// Original model has external initializers, align offset
TEST(SaveWithExternalInitializers, ModelWithOriginalExternalDataAlignOffset) {
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0, true));
}

} // namespace test
} // namespace onnxruntime

0 comments on commit 7069ec9

Please sign in to comment.