From 00dc8a9c543ef2836f30e2eb6a4a6a98de69b4bb Mon Sep 17 00:00:00 2001 From: Hyukjin Jeong Date: Tue, 29 Oct 2024 09:55:58 +0900 Subject: [PATCH] [record-minmax] Tidy RecordMinMax (#14267) This tidies RecordMinMax source code. - Remove unused code - Use proper test name ONE-DCO-1.0-Signed-off-by: Hyukjin Jeong --- compiler/record-minmax/CMakeLists.txt | 8 +- compiler/record-minmax/include/RecordMinMax.h | 9 - compiler/record-minmax/src/RecordMinMax.cpp | 417 ------------------ 3 files changed, 4 insertions(+), 430 deletions(-) diff --git a/compiler/record-minmax/CMakeLists.txt b/compiler/record-minmax/CMakeLists.txt index 6755de36eda..db02fee7625 100644 --- a/compiler/record-minmax/CMakeLists.txt +++ b/compiler/record-minmax/CMakeLists.txt @@ -64,7 +64,7 @@ set(TEST_SOURCES file(GLOB_RECURSE TESTS "tests/*.test.cpp") nnas_find_package(GTest REQUIRED) -GTest_AddTest(record_minmax_function_test ${TESTS} ${TEST_SOURCES}) -target_include_directories(record_minmax_function_test PRIVATE include) -target_link_libraries(record_minmax_function_test luci_lang) -target_link_libraries(record_minmax_function_test nncc_coverage) +GTest_AddTest(record_minmax_unittest ${TESTS} ${TEST_SOURCES}) +target_include_directories(record_minmax_unittest PRIVATE include) +target_link_libraries(record_minmax_unittest luci_lang) +target_link_libraries(record_minmax_unittest nncc_coverage) diff --git a/compiler/record-minmax/include/RecordMinMax.h b/compiler/record-minmax/include/RecordMinMax.h index b11898c9640..e6b289f361b 100644 --- a/compiler/record-minmax/include/RecordMinMax.h +++ b/compiler/record-minmax/include/RecordMinMax.h @@ -71,15 +71,6 @@ class RecordMinMax void profileDataInParallel(const std::string &input_data_path); -// TODO Remove unused code -#if 0 - void profileRawData(const std::string &input_data_path); - - void profileRawDataDirectory(const std::string &input_data_path); - - void profileDataWithRandomInputs(void); -#endif - void saveModel(const std::string &output_model_path); private: diff --git a/compiler/record-minmax/src/RecordMinMax.cpp b/compiler/record-minmax/src/RecordMinMax.cpp index 1c646b11b9d..9069a8adfaa 100644 --- a/compiler/record-minmax/src/RecordMinMax.cpp +++ b/compiler/record-minmax/src/RecordMinMax.cpp @@ -36,39 +36,6 @@ using DataType = loco::DataType; namespace { -// TODO Remove unused code -#if 0 -// Return a string with no whitespace from both ends -std::string trim(std::string s) -{ - // Trim left side - s.erase(s.begin(), - std::find_if(s.begin(), s.end(), [](unsigned char ch) { return !std::isspace(ch); })); - - // Trim right side - s.erase( - std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), - s.end()); - - return s; -} - -std::vector parse_line(const std::string &line) -{ - auto trimmed = trim(line); - std::stringstream ss(trimmed); - - std::vector res; - - std::string filename; - while (getline(ss, filename, ' ')) - { - res.emplace_back(filename); - } - return res; -} -#endif - // Max h5 file size for parallel recording in bytes = 1 GB const long h5_max_size_bytes = 1000000000; @@ -80,82 +47,6 @@ long getH5FileSize(const std::string &input_data_path) return in_file.tellg(); } -// TODO Remove unused code -#if 0 -uint32_t numElements(const luci::CircleNode *node) -{ - uint32_t num_elements = 1; - for (uint32_t i = 0; i < node->rank(); i++) - num_elements *= node->dim(i).value(); - - return num_elements; -} - -// Throw exception if input has one of the following conditions. -// 1. Have unknown dimension -// 2. Number of elements is 0 -void checkInputDimension(const luci::CircleInput *input) -{ - for (uint32_t i = 0; i < input->rank(); i++) - if (!input->dim(i).known()) - throw std::runtime_error(input->name() + " has unknown dimension"); - - if (numElements(input) == 0) - throw std::runtime_error(input->name() + " is a zero-sized input"); -} - -void readDataFromFile(const std::string &filename, std::vector &data, size_t data_size) -{ - assert(data.size() == data_size); // FIX_CALLER_UNLESS - - std::ifstream fs(filename, std::ifstream::binary); - if (fs.fail()) - throw std::runtime_error("Cannot open file \"" + filename + "\".\n"); - if (fs.read(data.data(), data_size).fail()) - throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n"); - if (fs.peek() != EOF) - throw std::runtime_error("Input tensor size mismatches with \"" + filename + "\".\n"); -} - -std::vector genRandomBoolData(std::mt19937 &gen, uint32_t num_elements) -{ - std::uniform_int_distribution<> dist(0, 1); - std::vector input_data(num_elements); - - // Write random data - for (auto &iter : input_data) - iter = static_cast(dist(gen)); - - return input_data; -} - -template -std::vector genRandomIntData(std::mt19937 &gen, uint32_t num_elements, T min, T max) -{ - std::uniform_int_distribution dist(min, max); - std::vector input_data(num_elements); - - // Write random data - { - auto const generator = [&gen, &dist]() { return dist(gen); }; - std::generate(begin(input_data), end(input_data), generator); - } - - return input_data; -} - -/** - * @brief getTensorSize will return size in bytes - */ -template size_t getTensorSize(const NodeT *node) -{ - uint32_t tensor_size = luci::size(node->dtype()); - for (uint32_t i = 0; i < node->rank(); ++i) - tensor_size *= node->dim(i).value(); - return tensor_size; -} -#endif - /** * @brief verifyTypeShape checks the type and the shape of CircleInput * This throws an exception if type or shape does not match @@ -209,172 +100,6 @@ void RecordMinMax::initialize(const std::string &input_model_path) } } -// TODO Remove unused code -#if 0 - -// input_data_path is a path to the directory -// The directory should contain binary files each of which is a raw data, -// ready to be consumed by the input circle model without any modification -// TODO reduce duplicate codes with profileRawData -void RecordMinMax::profileRawDataDirectory(const std::string &input_data_path) -{ - struct dirent *entry = nullptr; - DIR *dp = nullptr; - - dp = opendir(input_data_path.c_str()); - if (not dp) - throw std::runtime_error("Cannot open directory. Please check \"" + input_data_path + - "\" is a directory.\n"); - - uint32_t num_records = 0; - const auto input_nodes = loco::input_nodes(_module->graph()); - - // Get total input size - uint32_t total_input_size = 0; - for (auto input : input_nodes) - { - const auto *input_node = loco::must_cast(input); - checkInputDimension(input_node); - total_input_size += getTensorSize(input_node); - } - - while ((entry = readdir(dp))) - { - // Skip if the entry is not a regular file - if (entry->d_type != DT_REG) - continue; - - const std::string filename = entry->d_name; - std::cout << "Recording " << num_records << "'th data" << std::endl; - - // Read data from file to buffer - // Assumption: For a multi-input model, the binary file should have inputs concatenated in the - // same order with the input index. - std::vector input_data(total_input_size); - readDataFromFile(input_data_path + "/" + filename, input_data, total_input_size); - - // Write data from buffer to interpreter - uint32_t offset = 0; - for (auto input : input_nodes) - { - const auto *input_node = loco::must_cast(input); - const auto input_size = getTensorSize(input_node); - getInterpreter()->writeInputTensor(input_node, input_data.data() + offset, input_size); - - offset += input_size; - } - - getInterpreter()->interpret(); - - num_records++; - } - - closedir(dp); - - if (num_records == 0) - throw std::runtime_error("The input data file does not contain any record."); - - std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl; - - _minmax_computer->update_qparam(getObserver()->minMaxData()->getMap()); -} - -// input_data_path is a text file which specifies the representative data -// The text file should contain absolute file path per line. -// The pointed file should be a binary file containing one representative data, -// ready to be consumed by the input circle model without any modification -// NOTE If a model has multiple inputs, the binary file should have inputs concatenated in the same -// order with the input index of the circle model. -void RecordMinMax::profileRawData(const std::string &input_data_path) -{ - std::ifstream input_file(input_data_path); - if (input_file.fail()) - throw std::runtime_error("Cannot open file \"" + input_data_path + "\".\n"); - - std::string record; - uint32_t num_records = 0; - const auto input_nodes = loco::input_nodes(_module->graph()); - - // Get total input size - uint32_t total_input_size = 0; - for (auto input : input_nodes) - { - const auto *input_node = loco::must_cast(input); - checkInputDimension(input_node); - total_input_size += getTensorSize(input_node); - } - - while (getline(input_file, record)) - { - std::cout << "Recording " << num_records << "'th data" << std::endl; - - auto file_names = parse_line(record); - - // Have multiple files in one line - if (file_names.size() == input_nodes.size()) - { - std::vector> input_data; - for (uint32_t i = 0; i < file_names.size(); i++) - { - const auto file_name = file_names[i]; - const auto input_node = loco::must_cast(input_nodes[i]); - const auto input_size = getTensorSize(input_node); - - input_data.emplace_back(input_size); - - // Read data from file - readDataFromFile(file_name, input_data[i], input_size); - - // Write data from buffer to interpreter - getInterpreter()->writeInputTensor(input_node, input_data[i].data(), input_size); - } - - getInterpreter()->interpret(); - - num_records++; - } - else - { - // Must have a single file in one line (inputs are concatenated) - if (file_names.size() != 1) - throw std::runtime_error( - "Wrong number of inputs are given. Model has " + std::to_string(input_nodes.size()) + - " inputs, but list file gives " + std::to_string(file_names.size()) + " inputs."); - - // clang-format off - // Read data from file to buffer - // Assumption: For a multi-input model, the binary file should have inputs concatenated in the - // same order with the input index. - std::vector input_data(total_input_size); - readDataFromFile(record, input_data, total_input_size); - - // Write data from buffer to interpreter - uint32_t offset = 0; - for (auto input : input_nodes) - { - const auto *input_node = loco::must_cast(input); - const auto input_size = getTensorSize(input_node); - getInterpreter()->writeInputTensor(input_node, input_data.data() + offset, input_size); - - offset += input_size; - } - - getInterpreter()->interpret(); - - num_records++; - // clang-format on - } - } - - if (num_records == 0) - throw std::runtime_error("The input data file does not contain any record."); - - std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl; - - _minmax_computer->update_qparam(getObserver()->minMaxData()->getMap()); -} -#endif - WholeOutput RecordMinMax::importH5Data(const std::string &input_data_path) { try @@ -520,74 +245,6 @@ void RecordMinMax::profileData() _minmax_computer->update_qparam(getObserver()->minMaxData()->getMap()); } -// TODO Remove unused code -#if 0 -void RecordMinMax::profileData(const std::string &input_data_path) -{ - try - { - dio::hdf5::HDF5Importer importer(input_data_path); - importer.importGroup("value"); - - bool is_raw_data = importer.isRawData(); - - const auto num_records = importer.numData(); - if (num_records == 0) - throw std::runtime_error("The input data file does not contain any record."); - - const auto input_nodes = loco::input_nodes(_module->graph()); - const auto num_inputs = input_nodes.size(); - - for (int32_t record_idx = 0; record_idx < num_records; record_idx++) - { - if (num_inputs != static_cast(importer.numInputs(record_idx))) - throw std::runtime_error("Wrong number of inputs."); - - std::cout << "Recording " << record_idx << "'th data" << std::endl; - - for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++) - { - const auto *input_node = loco::must_cast(input_nodes[input_idx]); - assert(input_node->index() == input_idx); - checkInputDimension(input_node); - std::vector input_data(getTensorSize(input_node)); - - if (!is_raw_data) - { - DataType dtype; - Shape shape; - importer.readTensor(record_idx, input_idx, &dtype, &shape, input_data.data(), - input_data.size()); - - // Check the type and the shape of the input data is valid - verifyTypeShape(input_node, dtype, shape); - } - else - { - // Skip type/shape check for raw data - importer.readTensor(record_idx, input_idx, input_data.data(), input_data.size()); - } - - // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs) - // We can redcue the copy by directly writing data from file to interpreter inputs - getInterpreter()->writeInputTensor(input_node, input_data.data(), input_data.size()); - } - - getInterpreter()->interpret(); - } - - std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl; - } - catch (const H5::Exception &e) - { - H5::Exception::printErrorStack(); - throw std::runtime_error("HDF5 error occurred."); - } - - _minmax_computer->update_qparam(getObserver()->minMaxData()->getMap()); -} -#endif - void RecordMinMax::profileDataInParallel(const std::string &input_data_path) { LOGGER(l); @@ -674,80 +331,6 @@ void RecordMinMax::profileDataInParallel(const std::string &input_data_path) _minmax_computer->update_qparam(main_min_max_map.getMap()); } -// TODO Remove unused code -#if 0 -void RecordMinMax::profileDataWithRandomInputs(void) -{ - // We use three randomly-generated records - const uint32_t num_records = 3; - - const auto input_nodes = loco::input_nodes(_module->graph()); - const auto num_inputs = input_nodes.size(); - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution<> dist(-5, 5); - - for (uint32_t record_idx = 0; record_idx < num_records; record_idx++) - { - std::cout << "Recording " << record_idx << "'th data" << std::endl; - - for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++) - { - const auto *input_node = loco::must_cast(input_nodes[input_idx]); - assert(input_node->index() == input_idx); - checkInputDimension(input_node); - - const auto num_elements = numElements(input_node); - - // TODO Support more input data types - assert(input_node->dtype() == loco::DataType::FLOAT32 || - input_node->dtype() == loco::DataType::BOOL || - input_node->dtype() == loco::DataType::S32 || - input_node->dtype() == loco::DataType::S64); - - if (input_node->dtype() == DataType::FLOAT32) - { - std::vector input_data(num_elements); - - // Write random data - for (auto &iter : input_data) - iter = static_cast(dist(gen)); - - // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs) - // We can redcue the copy by directly writing data from file to interpreter inputs - getInterpreter()->writeInputTensor(input_node, input_data.data(), - input_data.size() * sizeof(float)); - } - else if (input_node->dtype() == DataType::BOOL) - { - auto input_data = genRandomBoolData(gen, num_elements); - getInterpreter()->writeInputTensor(input_node, input_data.data(), - input_data.size() * sizeof(uint8_t)); - } - else if (input_node->dtype() == DataType::S32) - { - auto input_data = genRandomIntData(gen, num_elements, 0, 100); - getInterpreter()->writeInputTensor(input_node, input_data.data(), - input_data.size() * sizeof(int32_t)); - } - else if (input_node->dtype() == DataType::S64) - { - auto input_data = genRandomIntData(gen, num_elements, 0, 100); - getInterpreter()->writeInputTensor(input_node, input_data.data(), - input_data.size() * sizeof(int64_t)); - } - } - - getInterpreter()->interpret(); - } - - std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl; - - _minmax_computer->update_qparam(getObserver()->minMaxData()->getMap()); -} -#endif - void RecordMinMax::saveModel(const std::string &output_model_path) { // Export to output Circle file