Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Build] Expose internal header files #23025

Open
naturliche opened this issue Dec 5, 2024 · 1 comment
Open

[Build] Expose internal header files #23025

naturliche opened this issue Dec 5, 2024 · 1 comment
Labels
build build issues; typically submitted using template platform:mobile issues related to ONNX Runtime mobile; typically submitted using template stale issues that have not been addressed in a while; categorized by a bot

Comments

@naturliche
Copy link

naturliche commented Dec 5, 2024

Describe the issue

When compiling and installing onnxruntime for debug, I want to expose internal header files, such as onnxruntime-main/onnxruntime/core/framework/fallback_cpu_capability.h. How should I do it?

Urgency

No response

Target platform

x86_64-linux

Build script

#include "onnxruntime_cxx_api.h"
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <map>
#include "nlohmann/json.hpp"
#include "onnx/onnx.pb.h"  
#include "fallback_cpu_capability.h"

using json = nlohmann::json;

void save_model_to_file(const onnx::ModelProto& model, const std::string& output_path) {
    std::ofstream output_file(output_path, std::ios::binary);
    if (!output_file.is_open()) {
        throw std::runtime_error("Failed to open output file: " + output_path);
    }

    if (!model.SerializeToOstream(&output_file)) {
        throw std::runtime_error("Failed to serialize model to file: " + output_path);
    }

    output_file.close();
}
onnx::ModelProto load_model_and_weights(const std::string& structure_path, const std::string& weights_path) {
    onnx::ModelProto model_structure;
    std::ifstream structure_file(structure_path, std::ios::binary);
    if (!structure_file.is_open()) {
        throw std::runtime_error("Failed to open model structure file: " + structure_path);
    }
    model_structure.ParseFromIstream(&structure_file);
    structure_file.close();

    std::map<std::string, std::pair<std::vector<uint32_t>, std::vector<float>>> weights;


    std::ifstream weights_file(weights_path, std::ios::binary);
    if (!weights_file.is_open()) {
        throw std::runtime_error("Failed to open weights file: " + weights_path);
    }

    while (true) {
        uint32_t name_len;
        if (!weights_file.read(reinterpret_cast<char*>(&name_len), sizeof(name_len))) break;

        std::string name(name_len, '\0');
        if (!weights_file.read(&name[0], name_len)) break;

        uint32_t num_dims;
        if (!weights_file.read(reinterpret_cast<char*>(&num_dims), sizeof(num_dims))) break;

        std::vector<uint32_t> shape(num_dims);
        if (!weights_file.read(reinterpret_cast<char*>(shape.data()), num_dims * sizeof(uint32_t))) break;

        size_t num_elements = 1;
        for (auto dim : shape) {
            num_elements *= dim;
        }

        std::vector<float> data(num_elements);
        if (!weights_file.read(reinterpret_cast<char*>(data.data()), num_elements * sizeof(float))) break;

        weights[name] = std::make_pair(shape, data);
    }


    for (const auto& kv : weights) {
        const std::string& name = kv.first;
        const auto& [shape, data] = kv.second;
        onnx::TensorProto initializer;
        initializer.set_name(name);

  
        for (auto dim : shape) {
            initializer.add_dims(dim);
        }

        initializer.set_data_type(onnx::TensorProto_DataType_FLOAT);
        for (float value : data) {
            initializer.add_float_data(value);
        }

        if(name == "enc_embedding.value_embedding.tokenConv.weight"){
            std::cout<< name<<std::endl;
            for(int j = 0; j < 10;j++){
                std::cout<< data[j]<<",";
            }
        }

        model_structure.mutable_graph()->add_initializer()->CopyFrom(initializer);
    }
    std::string output_path = "/home/baum/postgres-test/test/generate-test-onnx/restore_model.onnx";

    save_model_to_file(model_structure, output_path);
    return model_structure;
}


std::map<std::string, int> load_configs_from_json(const std::string& config_path) {
    std::ifstream config_file(config_path);
    if (!config_file.is_open()) {
        throw std::runtime_error("Failed to open config file: " + config_path);
    }
    json j;
    config_file >> j;
    config_file.close();

    std::map<std::string, int> configs;
    configs["seq_len"] = j["seq_len"];
    configs["label_len"] = j["label_len"];
    configs["pred_len"] = j["pred_len"];
    configs["enc_in"] = j["enc_in"];
    configs["dec_in"] = j["dec_in"];
    configs["embed"] = j["embed"] == "timeF" ? 1 : 0;
    configs["freq"] = j["freq"] == "t" ? 4 : (j["freq"] == "h" ? 4 : (j["freq"] == "d" ? 3 : (j["freq"] == "b" ? 3 : (j["freq"] == "w" ? 2 : (j["freq"] == "m" ? 2 : 1)))));
    return configs;
}


std::map<std::string, std::vector<float>> prepare_data(const std::map<std::string, int>& configs) {
    int batch_size = 1; 
    int seq_len = configs.at("seq_len");
    int label_len = configs.at("label_len");
    int pred_len = configs.at("pred_len");
    int enc_in = configs.at("enc_in");
    int dec_in = configs.at("dec_in");
    int time_feature_dim = configs.at("freq");

    std::vector<float> x_enc(batch_size * seq_len * enc_in, 1); 
    std::vector<float> x_mark_enc;
    if (time_feature_dim > 0) {
        x_mark_enc.resize(batch_size * seq_len * time_feature_dim, 0.5);  
    }
    std::vector<float> x_dec(batch_size * (label_len + pred_len) * dec_in, 1);  
    std::vector<float> x_mark_dec;
    if (time_feature_dim > 0) {
        x_mark_dec.resize(batch_size * (label_len + pred_len) * time_feature_dim, 0.5); 
    }

    std::map<std::string, std::vector<float>> inputs = {
        {"x_enc", x_enc},
    };
    if (time_feature_dim > 0) {
        inputs["x_mark_enc"] = x_mark_enc;
    }
    inputs["x_dec"] = x_dec;
    if (time_feature_dim > 0) {
        inputs["x_mark_dec"] = x_mark_dec;
    }

    return inputs;
}
void saveOutputsToFile(const std::vector<std::vector<float>>& outputs, const std::string& filename) {
    std::ofstream file(filename, std::ios::binary);
    if (!file.is_open()) {
        std::cerr << "Failed to open file: " << filename << std::endl;
        return;
    }


    size_t num_outputs = outputs.size();
    file.write(reinterpret_cast<const char*>(&num_outputs), sizeof(num_outputs));


    for (const auto& output : outputs) {
        size_t output_size = output.size();
        file.write(reinterpret_cast<const char*>(&output_size), sizeof(output_size));
        file.write(reinterpret_cast<const char*>(output.data()), output_size * sizeof(float));
    }

    file.close();
}


void saveModelToFile(const onnx::ModelProto& model, const std::string& filename) {
    std::ofstream file(filename, std::ios::binary);
    if (!file.is_open()) {
        std::cerr << "Failed to open file: " << filename << std::endl;
        return;
    }


    std::string model_str;
    if (!model.SerializeToString(&model_str)) {
        std::cerr << "Failed to serialize model to string." << std::endl;
        return;
    }


    file.write(model_str.c_str(), model_str.size());
    if (!file) {
        std::cerr << "Failed to write model to file: " << filename << std::endl;
        return;
    }

    file.close();
    std::cout << "Model saved to file: " << filename << std::endl;
}

std::vector<std::vector<float>> run_inference_from_memory(const onnx::ModelProto& model, const std::map<std::string, std::vector<float>>& inputs, const std::map<std::string, int>& configs,bool use_gpu) {

    std::string model_bytes;
    model.SerializeToString(&model_bytes);

  
    Ort::Env env(ORT_LOGGING_LEVEL_VERBOSE, "Test");
    Ort::SessionOptions session_options;

    session_options.SetIntraOpNumThreads(0); 
    session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
    if (use_gpu) {

        OrtCUDAProviderOptions cuda_options;
        cuda_options.device_id = 0; 
        session_options.AppendExecutionProvider_CUDA(cuda_options);

    }

    Ort::Session session(env, model_bytes.c_str(), model_bytes.size(), session_options);


    const auto& graph = model.graph();
    size_t num_inputs = graph.input_size();
    size_t num_outputs = graph.output_size();

    std::vector<const char*> input_names(num_inputs);
    std::vector<const char*> output_names(num_outputs);

    for (size_t i = 0; i < num_inputs; ++i) {
        input_names[i] = graph.input(i).name().c_str();
    }

    for (size_t i = 0; i < num_outputs; ++i) {
        output_names[i] = graph.output(i).name().c_str();
    }


    std::vector<Ort::Value> input_tensors;
    for (const auto& input_name : input_names) {
        auto it = inputs.find(input_name);
        if (it == inputs.end()) {
            throw std::runtime_error("Input not found: " + std::string(input_name));
        }

        const std::string& name = it->first;
        const std::vector<float>& data = it->second;

        std::vector<int64_t> shape;
        if (name == "x_enc") {
            shape = {1, configs.at("seq_len"), configs.at("enc_in")};
        } else if (name == "x_mark_enc") {
            shape = {1, configs.at("seq_len"), configs.at("freq")};
        } else if (name == "x_dec") {
            shape = {1, configs.at("label_len") + configs.at("pred_len"), configs.at("dec_in")};
        } else if (name == "x_mark_dec") {
            shape = {1, configs.at("label_len") + configs.at("pred_len"), configs.at("freq")};
        } else {
            throw std::runtime_error("Unknown input name: " + name);
        }


        std::cout << "Input tensor shape for " << name << ": [";
        for (size_t i = 0; i < shape.size(); ++i) {
            std::cout << shape[i] << (i < shape.size() - 1 ? ", " : "");
        }
        std::cout << "]" << std::endl;

        Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
        Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, const_cast<float*>(data.data()), data.size(), shape.data(), shape.size());


        auto tensor_info = input_tensor.GetTensorTypeAndShapeInfo();
        auto actual_shape = tensor_info.GetShape();

        input_tensors.push_back(std::move(input_tensor));
    }


    std::vector<Ort::Value> output_tensors = session.Run(Ort::RunOptions{nullptr}, input_names.data(), input_tensors.data(), input_tensors.size(), output_names.data(), num_outputs);


    std::vector<std::vector<float>> outputs;
    for (auto& output_tensor : output_tensors) {
        float* output_data = output_tensor.GetTensorMutableData<float>();
        size_t output_size = output_tensor.GetTensorTypeAndShapeInfo().GetElementCount();
        std::vector<float> output(output_data, output_data + output_size);
        outputs.push_back(output);
    }

    std::string filename = "outputs.bin";
    saveOutputsToFile(outputs, filename);


    std::string output_filename = "model_output.onnx";
    saveModelToFile(model, output_filename);
    return outputs;
}

std::pair<onnx::ModelProto, onnx::ModelProto> split_model_into_subgraphs(const onnx::ModelProto& model) {

    onnx::ModelProto cpu_subgraph = model;
    onnx::ModelProto gpu_subgraph = model;


    cpu_subgraph.mutable_graph()->clear_node();
    gpu_subgraph.mutable_graph()->clear_node();


    const auto& graph = model.graph();


    onnxruntime::GraphViewer graph_viewer = create_graph_viewer(graph);


    std::vector<NodeIndex> tentative_nodes;
    for (int i = 0; i < graph.node_size(); ++i) {
        tentative_nodes.push_back(i);
    }


    auto cpu_nodes = GetCpuPreferredNodes(graph_viewer, kernel_lookup, gsl::make_span(tentative_nodes));

   
    for (int i = 0; i < graph.node_size(); ++i) {
        const auto& node = graph.node(i);

        if (cpu_nodes.find(i) != cpu_nodes.end()) {
   
            *cpu_subgraph.mutable_graph()->add_node() = node;
        } else {

            *gpu_subgraph.mutable_graph()->add_node() = node;
        }
    }

    return {cpu_subgraph, gpu_subgraph};
}


void process_model(const onnx::ModelProto& model, const std::string& cpu_model_path, const std::string& gpu_model_path) {


    auto [cpu_subgraph, gpu_subgraph] = split_model_into_subgraphs(model);


    save_model_to_file(cpu_subgraph, cpu_model_path);


    save_model_to_file(gpu_subgraph, gpu_model_path);

    std::cout << "Model split and saved successfully!" << std::endl;
}


int main() {
    try {

        std::string config_path = "/home/baum/postgres-test/test/generate-test-onnx/config.json";
        std::map<std::string, int> configs = load_configs_from_json(config_path);

      
        auto inputs = prepare_data(configs);


        std::string structure_output_path = "/home/baum/postgres-test/test/generate-test-onnx/model_structure.onnx";
        std::string weights_output_path = "/home/baum/postgres-test/test/generate-test-onnx/model_weights.npz";


        onnx::ModelProto restored_model = load_model_and_weights(structure_output_path, weights_output_path);

        process_model(restored_model, "cpu_subgraph.onnx", "gpu_subgraph.onnx");


        auto outputs = run_inference_from_memory(restored_model, inputs, configs, true);

        // std::string path = "/home/baum/postgres-test/test/generate-test-onnx/transformer_model.onnx";
        // onnx::ModelProto model_structure;
        // std::ifstream structure_file(path, std::ios::binary);
        // model_structure.ParseFromIstream(&structure_file);
        // structure_file.close();
        
        
        for (const auto& output : outputs) {
            for (const auto& value : output) {
                std::cout << value << " ";
            }
            std::cout << std::endl;
        }
    } catch (const std::exception& e) {
        std::cerr << "Error: " << e.what() << std::endl;
        return 1;
    }
    
    return 0;
}

Error / output

[ 33%] Building CXX object CMakeFiles/generate_subgrap.dir/generate_subgrap.cpp.o
In file included from /home/baum/postgres-test/test/generate-test-onnx/generate_subgrap.cpp:9:
/home/baum/postgres-test/onnxruntime-main/onnxruntime/core/framework/fallback_cpu_capability.h:6:10: fatal error: gsl/gsl: No such file or directory
6 | #include <gsl/gsl>
| ^~~~~~~~~
compilation terminated.
make[2]: *** [CMakeFiles/generate_subgrap.dir/build.make:76: CMakeFiles/generate_subgrap.dir/generate_subgrap.cpp.o] Error 1
make[1]: *** [CMakeFiles/Makefile2:83: CMakeFiles/generate_subgrap.dir/all] Error 2
make: *** [Makefile:91: all] Error 2

Visual Studio Version

vscode

GCC / Compiler Version

GNU 9.4.0

@naturliche naturliche added the build build issues; typically submitted using template label Dec 5, 2024
@github-actions github-actions bot added the platform:mobile issues related to ONNX Runtime mobile; typically submitted using template label Dec 5, 2024
Copy link
Contributor

github-actions bot commented Jan 5, 2025

This issue has been automatically marked as stale due to inactivity and will be closed in 30 days if no further activity occurs. If further support is needed, please provide an update and/or more details.

@github-actions github-actions bot added the stale issues that have not been addressed in a while; categorized by a bot label Jan 5, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
build build issues; typically submitted using template platform:mobile issues related to ONNX Runtime mobile; typically submitted using template stale issues that have not been addressed in a while; categorized by a bot
Projects
None yet
Development

No branches or pull requests

1 participant