[Build] Expose internal header files #23025

naturliche · 2024-12-05T16:37:14Z

Describe the issue

When compiling and installing onnxruntime for debug, I want to expose internal header files, such as onnxruntime-main/onnxruntime/core/framework/fallback_cpu_capability.h. How should I do it?

Urgency

No response

Target platform

x86_64-linux

Build script

#include "onnxruntime_cxx_api.h"
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <map>
#include "nlohmann/json.hpp"
#include "onnx/onnx.pb.h"  
#include "fallback_cpu_capability.h"

using json = nlohmann::json;

void save_model_to_file(const onnx::ModelProto& model, const std::string& output_path) {
    std::ofstream output_file(output_path, std::ios::binary);
    if (!output_file.is_open()) {
        throw std::runtime_error("Failed to open output file: " + output_path);
    }

    if (!model.SerializeToOstream(&output_file)) {
        throw std::runtime_error("Failed to serialize model to file: " + output_path);
    }

    output_file.close();
}
onnx::ModelProto load_model_and_weights(const std::string& structure_path, const std::string& weights_path) {
    onnx::ModelProto model_structure;
    std::ifstream structure_file(structure_path, std::ios::binary);
    if (!structure_file.is_open()) {
        throw std::runtime_error("Failed to open model structure file: " + structure_path);
    }
    model_structure.ParseFromIstream(&structure_file);
    structure_file.close();

    std::map<std::string, std::pair<std::vector<uint32_t>, std::vector<float>>> weights;


    std::ifstream weights_file(weights_path, std::ios::binary);
    if (!weights_file.is_open()) {
        throw std::runtime_error("Failed to open weights file: " + weights_path);
    }

    while (true) {
        uint32_t name_len;
        if (!weights_file.read(reinterpret_cast<char*>(&name_len), sizeof(name_len))) break;

        std::string name(name_len, '\0');
        if (!weights_file.read(&name[0], name_len)) break;

        uint32_t num_dims;
        if (!weights_file.read(reinterpret_cast<char*>(&num_dims), sizeof(num_dims))) break;

        std::vector<uint32_t> shape(num_dims);
        if (!weights_file.read(reinterpret_cast<char*>(shape.data()), num_dims * sizeof(uint32_t))) break;

        size_t num_elements = 1;
        for (auto dim : shape) {
            num_elements *= dim;
        }

        std::vector<float> data(num_elements);
        if (!weights_file.read(reinterpret_cast<char*>(data.data()), num_elements * sizeof(float))) break;

        weights[name] = std::make_pair(shape, data);
    }


    for (const auto& kv : weights) {
        const std::string& name = kv.first;
        const auto& [shape, data] = kv.second;
        onnx::TensorProto initializer;
        initializer.set_name(name);

  
        for (auto dim : shape) {
            initializer.add_dims(dim);
        }

        initializer.set_data_type(onnx::TensorProto_DataType_FLOAT);
        for (float value : data) {
            initializer.add_float_data(value);
        }

        if(name == "enc_embedding.value_embedding.tokenConv.weight"){
            std::cout<< name<<std::endl;
            for(int j = 0; j < 10;j++){
                std::cout<< data[j]<<",";
            }
        }

        model_structure.mutable_graph()->add_initializer()->CopyFrom(initializer);
    }
    std::string output_path = "/home/baum/postgres-test/test/generate-test-onnx/restore_model.onnx";

    save_model_to_file(model_structure, output_path);
    return model_structure;
}


std::map<std::string, int> load_configs_from_json(const std::string& config_path) {
    std::ifstream config_file(config_path);
    if (!config_file.is_open()) {
        throw std::runtime_error("Failed to open config file: " + config_path);
    }
    json j;
    config_file >> j;
    config_file.close();

    std::map<std::string, int> configs;
    configs["seq_len"] = j["seq_len"];
    configs["label_len"] = j["label_len"];
    configs["pred_len"] = j["pred_len"];
    configs["enc_in"] = j["enc_in"];
    configs["dec_in"] = j["dec_in"];
    configs["embed"] = j["embed"] == "timeF" ? 1 : 0;
    configs["freq"] = j["freq"] == "t" ? 4 : (j["freq"] == "h" ? 4 : (j["freq"] == "d" ? 3 : (j["freq"] == "b" ? 3 : (j["freq"] == "w" ? 2 : (j["freq"] == "m" ? 2 : 1)))));
    return configs;
}


std::map<std::string, std::vector<float>> prepare_data(const std::map<std::string, int>& configs) {
    int batch_size = 1; 
    int seq_len = configs.at("seq_len");
    int label_len = configs.at("label_len");
    int pred_len = configs.at("pred_len");
    int enc_in = configs.at("enc_in");
    int dec_in = configs.at("dec_in");
    int time_feature_dim = configs.at("freq");

    std::vector<float> x_enc(batch_size * seq_len * enc_in, 1); 
    std::vector<float> x_mark_enc;
    if (time_feature_dim > 0) {
        x_mark_enc.resize(batch_size * seq_len * time_feature_dim, 0.5);  
    }
    std::vector<float> x_dec(batch_size * (label_len + pred_len) * dec_in, 1);  
    std::vector<float> x_mark_dec;
    if (time_feature_dim > 0) {
        x_mark_dec.resize(batch_size * (label_len + pred_len) * time_feature_dim, 0.5); 
    }

    std::map<std::string, std::vector<float>> inputs = {
        {"x_enc", x_enc},
    };
    if (time_feature_dim > 0) {
        inputs["x_mark_enc"] = x_mark_enc;
    }
    inputs["x_dec"] = x_dec;
    if (time_feature_dim > 0) {
        inputs["x_mark_dec"] = x_mark_dec;
    }

    return inputs;
}
void saveOutputsToFile(const std::vector<std::vector<float>>& outputs, const std::string& filename) {
    std::ofstream file(filename, std::ios::binary);
    if (!file.is_open()) {
        std::cerr << "Failed to open file: " << filename << std::endl;
        return;
    }


    size_t num_outputs = outputs.size();
    file.write(reinterpret_cast<const char*>(&num_outputs), sizeof(num_outputs));


    for (const auto& output : outputs) {
        size_t output_size = output.size();
        file.write(reinterpret_cast<const char*>(&output_size), sizeof(output_size));
        file.write(reinterpret_cast<const char*>(output.data()), output_size * sizeof(float));
    }

    file.close();
}


void saveModelToFile(const onnx::ModelProto& model, const std::string& filename) {
    std::ofstream file(filename, std::ios::binary);
    if (!file.is_open()) {
        std::cerr << "Failed to open file: " << filename << std::endl;
        return;
    }


    std::string model_str;
    if (!model.SerializeToString(&model_str)) {
        std::cerr << "Failed to serialize model to string." << std::endl;
        return;
    }


    file.write(model_str.c_str(), model_str.size());
    if (!file) {
        std::cerr << "Failed to write model to file: " << filename << std::endl;
        return;
    }

    file.close();
    std::cout << "Model saved to file: " << filename << std::endl;
}

std::vector<std::vector<float>> run_inference_from_memory(const onnx::ModelProto& model, const std::map<std::string, std::vector<float>>& inputs, const std::map<std::string, int>& configs,bool use_gpu) {

    std::string model_bytes;
    model.SerializeToString(&model_bytes);

  
    Ort::Env env(ORT_LOGGING_LEVEL_VERBOSE, "Test");
    Ort::SessionOptions session_options;

    session_options.SetIntraOpNumThreads(0); 
    session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
    if (use_gpu) {

        OrtCUDAProviderOptions cuda_options;
        cuda_options.device_id = 0; 
        session_options.AppendExecutionProvider_CUDA(cuda_options);

    }

    Ort::Session session(env, model_bytes.c_str(), model_bytes.size(), session_options);


    const auto& graph = model.graph();
    size_t num_inputs = graph.input_size();
    size_t num_outputs = graph.output_size();

    std::vector<const char*> input_names(num_inputs);
    std::vector<const char*> output_names(num_outputs);

    for (size_t i = 0; i < num_inputs; ++i) {
        input_names[i] = graph.input(i).name().c_str();
    }

    for (size_t i = 0; i < num_outputs; ++i) {
        output_names[i] = graph.output(i).name().c_str();
    }


    std::vector<Ort::Value> input_tensors;
    for (const auto& input_name : input_names) {
        auto it = inputs.find(input_name);
        if (it == inputs.end()) {
            throw std::runtime_error("Input not found: " + std::string(input_name));
        }

        const std::string& name = it->first;
        const std::vector<float>& data = it->second;

        std::vector<int64_t> shape;
        if (name == "x_enc") {
            shape = {1, configs.at("seq_len"), configs.at("enc_in")};
        } else if (name == "x_mark_enc") {
            shape = {1, configs.at("seq_len"), configs.at("freq")};
        } else if (name == "x_dec") {
            shape = {1, configs.at("label_len") + configs.at("pred_len"), configs.at("dec_in")};
        } else if (name == "x_mark_dec") {
            shape = {1, configs.at("label_len") + configs.at("pred_len"), configs.at("freq")};
        } else {
            throw std::runtime_error("Unknown input name: " + name);
        }


        std::cout << "Input tensor shape for " << name << ": [";
        for (size_t i = 0; i < shape.size(); ++i) {
            std::cout << shape[i] << (i < shape.size() - 1 ? ", " : "");
        }
        std::cout << "]" << std::endl;

        Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
        Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, const_cast<float*>(data.data()), data.size(), shape.data(), shape.size());


        auto tensor_info = input_tensor.GetTensorTypeAndShapeInfo();
        auto actual_shape = tensor_info.GetShape();

        input_tensors.push_back(std::move(input_tensor));
    }


    std::vector<Ort::Value> output_tensors = session.Run(Ort::RunOptions{nullptr}, input_names.data(), input_tensors.data(), input_tensors.size(), output_names.data(), num_outputs);


    std::vector<std::vector<float>> outputs;
    for (auto& output_tensor : output_tensors) {
        float* output_data = output_tensor.GetTensorMutableData<float>();
        size_t output_size = output_tensor.GetTensorTypeAndShapeInfo().GetElementCount();
        std::vector<float> output(output_data, output_data + output_size);
        outputs.push_back(output);
    }

    std::string filename = "outputs.bin";
    saveOutputsToFile(outputs, filename);


    std::string output_filename = "model_output.onnx";
    saveModelToFile(model, output_filename);
    return outputs;
}

std::pair<onnx::ModelProto, onnx::ModelProto> split_model_into_subgraphs(const onnx::ModelProto& model) {

    onnx::ModelProto cpu_subgraph = model;
    onnx::ModelProto gpu_subgraph = model;


    cpu_subgraph.mutable_graph()->clear_node();
    gpu_subgraph.mutable_graph()->clear_node();


    const auto& graph = model.graph();


    onnxruntime::GraphViewer graph_viewer = create_graph_viewer(graph);


    std::vector<NodeIndex> tentative_nodes;
    for (int i = 0; i < graph.node_size(); ++i) {
        tentative_nodes.push_back(i);
    }


    auto cpu_nodes = GetCpuPreferredNodes(graph_viewer, kernel_lookup, gsl::make_span(tentative_nodes));

   
    for (int i = 0; i < graph.node_size(); ++i) {
        const auto& node = graph.node(i);

        if (cpu_nodes.find(i) != cpu_nodes.end()) {
   
            *cpu_subgraph.mutable_graph()->add_node() = node;
        } else {

            *gpu_subgraph.mutable_graph()->add_node() = node;
        }
    }

    return {cpu_subgraph, gpu_subgraph};
}


void process_model(const onnx::ModelProto& model, const std::string& cpu_model_path, const std::string& gpu_model_path) {


    auto [cpu_subgraph, gpu_subgraph] = split_model_into_subgraphs(model);


    save_model_to_file(cpu_subgraph, cpu_model_path);


    save_model_to_file(gpu_subgraph, gpu_model_path);

    std::cout << "Model split and saved successfully!" << std::endl;
}


int main() {
    try {

        std::string config_path = "/home/baum/postgres-test/test/generate-test-onnx/config.json";
        std::map<std::string, int> configs = load_configs_from_json(config_path);

      
        auto inputs = prepare_data(configs);


        std::string structure_output_path = "/home/baum/postgres-test/test/generate-test-onnx/model_structure.onnx";
        std::string weights_output_path = "/home/baum/postgres-test/test/generate-test-onnx/model_weights.npz";


        onnx::ModelProto restored_model = load_model_and_weights(structure_output_path, weights_output_path);

        process_model(restored_model, "cpu_subgraph.onnx", "gpu_subgraph.onnx");


        auto outputs = run_inference_from_memory(restored_model, inputs, configs, true);

        // std::string path = "/home/baum/postgres-test/test/generate-test-onnx/transformer_model.onnx";
        // onnx::ModelProto model_structure;
        // std::ifstream structure_file(path, std::ios::binary);
        // model_structure.ParseFromIstream(&structure_file);
        // structure_file.close();
        
        
        for (const auto& output : outputs) {
            for (const auto& value : output) {
                std::cout << value << " ";
            }
            std::cout << std::endl;
        }
    } catch (const std::exception& e) {
        std::cerr << "Error: " << e.what() << std::endl;
        return 1;
    }
    
    return 0;
}

Error / output

[ 33%] Building CXX object CMakeFiles/generate_subgrap.dir/generate_subgrap.cpp.o
In file included from /home/baum/postgres-test/test/generate-test-onnx/generate_subgrap.cpp:9:
/home/baum/postgres-test/onnxruntime-main/onnxruntime/core/framework/fallback_cpu_capability.h:6:10: fatal error: gsl/gsl: No such file or directory
6 | #include <gsl/gsl>
| ^~~~~~~~~
compilation terminated.
make[2]: *** [CMakeFiles/generate_subgrap.dir/build.make:76: CMakeFiles/generate_subgrap.dir/generate_subgrap.cpp.o] Error 1
make[1]: *** [CMakeFiles/Makefile2:83: CMakeFiles/generate_subgrap.dir/all] Error 2
make: *** [Makefile:91: all] Error 2

Visual Studio Version

vscode

GCC / Compiler Version

GNU 9.4.0

github-actions · 2025-01-05T15:03:27Z

This issue has been automatically marked as stale due to inactivity and will be closed in 30 days if no further activity occurs. If further support is needed, please provide an update and/or more details.

naturliche added the build build issues; typically submitted using template label Dec 5, 2024

github-actions bot added the platform:mobile issues related to ONNX Runtime mobile; typically submitted using template label Dec 5, 2024

github-actions bot added the stale issues that have not been addressed in a while; categorized by a bot label Jan 5, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[Build] Expose internal header files #23025

[Build] Expose internal header files #23025

naturliche commented Dec 5, 2024 •

edited by snnn

Loading

github-actions bot commented Jan 5, 2025

[Build] Expose internal header files #23025

[Build] Expose internal header files #23025

Comments

naturliche commented Dec 5, 2024 • edited by snnn Loading

Describe the issue

Urgency

Target platform

Build script

Error / output

Visual Studio Version

GCC / Compiler Version

github-actions bot commented Jan 5, 2025

naturliche commented Dec 5, 2024 •

edited by snnn

Loading