From 5b6c7d3db96aeee46ba026094226301407ead612 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 18 Dec 2024 12:05:54 -0500 Subject: [PATCH 01/79] remove csv_serializer.hpp --- include/jsoncons_ext/csv/csv_serializer.hpp | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 include/jsoncons_ext/csv/csv_serializer.hpp diff --git a/include/jsoncons_ext/csv/csv_serializer.hpp b/include/jsoncons_ext/csv/csv_serializer.hpp deleted file mode 100644 index d8028ec64f..0000000000 --- a/include/jsoncons_ext/csv/csv_serializer.hpp +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright 2013-2024 Daniel Parker -// Distributed under the Boost license, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -// See https://github.com/danielaparker/jsoncons for latest version - -#ifndef JSONCONS_CSV_CSV_SERIALIZER_HPP -#define JSONCONS_CSV_CSV_SERIALIZER_HPP - -#include - -#endif From 3088634be30f280f0f36ead3fca028a3c81ea8c3 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 19 Dec 2024 19:56:47 -0500 Subject: [PATCH 02/79] generalized json to csv --- include/jsoncons_ext/csv/csv_encoder.hpp | 111 ++++++++++++++++------- test/csv/src/csv_json_tests.cpp | 56 ++++++++++++ 2 files changed, 132 insertions(+), 35 deletions(-) create mode 100644 test/csv/src/csv_json_tests.cpp diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 2aec0bbeb3..e75cf28a99 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -70,9 +70,10 @@ class basic_csv_encoder final : public basic_json_visitor { stack_item_kind item_kind_; std::size_t count_; + std::string pathname_; stack_item(stack_item_kind item_kind) noexcept - : item_kind_(item_kind), count_(0) + : item_kind_(item_kind), pathname_{}, count_(0) { } @@ -93,10 +94,10 @@ class basic_csv_encoder final : public basic_json_visitor std::vector stack_; jsoncons::detail::write_double fp_; - std::vector strings_buffer_; + std::vector strings_buffer_; std::unordered_map,std::equal_to,string_string_allocator_type> buffered_line_; - string_type name_; + std::size_t column_index_; std::vector row_counts_; @@ -139,7 +140,6 @@ class basic_csv_encoder final : public basic_json_visitor stack_.clear(); strings_buffer_.clear(); buffered_line_.clear(); - name_.clear(); column_index_ = 0; row_counts_.clear(); } @@ -192,6 +192,10 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row_mapping: stack_.emplace_back(stack_item_kind::object); return true; + case stack_item_kind::object: + //std::cout << "visit_begin_object: " << stack_.back().pathname_ << "\n"; + stack_.emplace_back(stack_item_kind::object); + return true; default: // error ec = csv_errc::source_error; return false; @@ -205,34 +209,37 @@ class basic_csv_encoder final : public basic_json_visitor switch (stack_.back().item_kind_) { case stack_item_kind::object: - if (stack_[0].count_ == 0) + if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::row_mapping) { + if (stack_[0].count_ == 0) + { + for (std::size_t i = 0; i < strings_buffer_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + sink_.append(strings_buffer_[i].data(), + strings_buffer_[i].length()); + } + sink_.append(options_.line_delimiter().data(), + options_.line_delimiter().length()); + } for (std::size_t i = 0; i < strings_buffer_.size(); ++i) { if (i > 0) { sink_.push_back(options_.field_delimiter()); } - sink_.append(strings_buffer_[i].data(), - strings_buffer_[i].length()); - } - sink_.append(options_.line_delimiter().data(), - options_.line_delimiter().length()); - } - for (std::size_t i = 0; i < strings_buffer_.size(); ++i) - { - if (i > 0) - { - sink_.push_back(options_.field_delimiter()); - } - auto it = buffered_line_.find(strings_buffer_[i]); - if (it != buffered_line_.end()) - { - sink_.append(it->second.data(),it->second.length()); - it->second.clear(); + auto it = buffered_line_.find(strings_buffer_[i]); + if (it != buffered_line_.end()) + { + sink_.append(it->second.data(),it->second.length()); + it->second.clear(); + } } + sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); } - sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); break; case stack_item_kind::column_mapping: { @@ -345,12 +352,16 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::object: { - name_ = string_type(name); - buffered_line_[string_type(name)] = std::basic_string(); - if (stack_[0].count_ == 0 && options_.column_names().size() == 0) - { - strings_buffer_.emplace_back(name); - } + stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; + stack_.back().pathname_.push_back('/'); + stack_.back().pathname_.append(std::string(name)); + //std::cout << "visit_key: " << stack_.back().pathname_ << "\n"; + + //buffered_line_[stack_.back().pathname_] = std::basic_string(); + //if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + //{ + // strings_buffer_.emplace_back(stack_.back().pathname_); + //} break; } case stack_item_kind::column_mapping: @@ -380,7 +391,12 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { - auto it = buffered_line_.find(name_); + if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + { + strings_buffer_.emplace_back(stack_.back().pathname_); + } + buffered_line_[stack_.back().pathname_] = std::basic_string(); + auto it = buffered_line_.find(stack_.back().pathname_); if (it != buffered_line_.end()) { std::basic_string s; @@ -429,7 +445,12 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { - auto it = buffered_line_.find(name_); + if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + { + strings_buffer_.emplace_back(stack_.back().pathname_); + } + buffered_line_[stack_.back().pathname_] = std::basic_string(); + auto it = buffered_line_.find(stack_.back().pathname_); if (it != buffered_line_.end()) { std::basic_string s; @@ -534,7 +555,12 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { - auto it = buffered_line_.find(name_); + if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + { + strings_buffer_.emplace_back(stack_.back().pathname_); + } + buffered_line_[stack_.back().pathname_] = std::basic_string(); + auto it = buffered_line_.find(stack_.back().pathname_); if (it != buffered_line_.end()) { std::basic_string s; @@ -586,7 +612,12 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { - auto it = buffered_line_.find(name_); + if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + { + strings_buffer_.emplace_back(stack_.back().pathname_); + } + buffered_line_[stack_.back().pathname_] = std::basic_string(); + auto it = buffered_line_.find(stack_.back().pathname_); if (it != buffered_line_.end()) { std::basic_string s; @@ -638,7 +669,12 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { - auto it = buffered_line_.find(name_); + if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + { + strings_buffer_.emplace_back(stack_.back().pathname_); + } + buffered_line_[stack_.back().pathname_] = std::basic_string(); + auto it = buffered_line_.find(stack_.back().pathname_); if (it != buffered_line_.end()) { std::basic_string s; @@ -687,7 +723,12 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { - auto it = buffered_line_.find(name_); + if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + { + strings_buffer_.emplace_back(stack_.back().pathname_); + } + buffered_line_[stack_.back().pathname_] = std::basic_string(); + auto it = buffered_line_.find(stack_.back().pathname_); if (it != buffered_line_.end()) { std::basic_string s; diff --git a/test/csv/src/csv_json_tests.cpp b/test/csv/src/csv_json_tests.cpp new file mode 100644 index 0000000000..71e72d907b --- /dev/null +++ b/test/csv/src/csv_json_tests.cpp @@ -0,0 +1,56 @@ +// Copyright 2013-2024 Daniel Parker +// Distributed under Boost license + +#include +#include +#include + +namespace csv = jsoncons::csv; + +TEST_CASE("test csv to json") +{ + SECTION("test 1") + { + std::string jtext = R"( +[ + { + "text": "Chicago Reader", + "float": 1.0, + "datetime": "1971-01-01T04:14:00", + "boolean": true, + "nested": { + "time": "04:14:00", + "nested": { + "date": "1971-01-01", + "integer": 40 + } + } + }, + { + "text": "Chicago Sun-Times", + "float": 1.27, + "datetime": "1948-01-01T14:57:13", + "boolean": true, + "nested": { + "time": "14:57:13", + "nested": { + "date": "1948-01-01", + "integer": 63 + } + } + } +] + )"; + + auto j = jsoncons::json::parse(jtext); + std::cout << pretty_print(j) << "\n"; + + std::string buf; + csv::csv_string_encoder encoder(buf); + j.dump(encoder); + + std::cout << buf << "\n"; + } +} + + From 34bf5f57aa4b4867ac05324e4ec6cff43324d81e Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Fri, 20 Dec 2024 03:59:23 -0500 Subject: [PATCH 03/79] generalized json to csv --- include/jsoncons/basic_json.hpp | 1 - include/jsoncons_ext/csv/csv_encoder.hpp | 148 +++++++++++------------ 2 files changed, 74 insertions(+), 75 deletions(-) diff --git a/include/jsoncons/basic_json.hpp b/include/jsoncons/basic_json.hpp index 211e4d4a03..76cc7dd8ff 100644 --- a/include/jsoncons/basic_json.hpp +++ b/include/jsoncons/basic_json.hpp @@ -397,7 +397,6 @@ namespace jsoncons { using key_type = typename policy_type::template member_key; - using reference = basic_json&; using const_reference = const basic_json&; using pointer = basic_json*; diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index e75cf28a99..638ada0711 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -95,8 +95,8 @@ class basic_csv_encoder final : public basic_json_visitor std::vector stack_; jsoncons::detail::write_double fp_; - std::vector strings_buffer_; - std::unordered_map,std::equal_to,string_string_allocator_type> buffered_line_; + std::vector column_names_; + std::unordered_map,std::equal_to,string_string_allocator_type> cname_value_map_; std::size_t column_index_; std::vector row_counts_; @@ -121,7 +121,7 @@ class basic_csv_encoder final : public basic_json_visitor fp_(options.float_format(), options.precision()), column_index_(0) { - jsoncons::csv::detail::parse_column_names(options.column_names(), strings_buffer_); + jsoncons::csv::detail::parse_column_names(options.column_names(), column_names_); } ~basic_csv_encoder() noexcept @@ -138,8 +138,8 @@ class basic_csv_encoder final : public basic_json_visitor void reset() { stack_.clear(); - strings_buffer_.clear(); - buffered_line_.clear(); + column_names_.clear(); + cname_value_map_.clear(); column_index_ = 0; row_counts_.clear(); } @@ -213,26 +213,26 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - for (std::size_t i = 0; i < strings_buffer_.size(); ++i) + for (std::size_t i = 0; i < column_names_.size(); ++i) { if (i > 0) { sink_.push_back(options_.field_delimiter()); } - sink_.append(strings_buffer_[i].data(), - strings_buffer_[i].length()); + sink_.append(column_names_[i].data(), + column_names_[i].length()); } sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); } - for (std::size_t i = 0; i < strings_buffer_.size(); ++i) + for (std::size_t i = 0; i < column_names_.size(); ++i) { if (i > 0) { sink_.push_back(options_.field_delimiter()); } - auto it = buffered_line_.find(strings_buffer_[i]); - if (it != buffered_line_.end()) + auto it = cname_value_map_.find(column_names_[i]); + if (it != cname_value_map_.end()) { sink_.append(it->second.data(),it->second.length()); it->second.clear(); @@ -243,7 +243,7 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::column_mapping: { - for (const auto& item : strings_buffer_) + for (const auto& item : column_names_) { sink_.append(item.data(), item.size()); sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); @@ -274,15 +274,15 @@ class basic_csv_encoder final : public basic_json_visitor stack_.emplace_back(stack_item_kind::row); if (stack_[0].count_ == 0) { - for (std::size_t i = 0; i < strings_buffer_.size(); ++i) + for (std::size_t i = 0; i < column_names_.size(); ++i) { if (i > 0) { sink_.push_back(options_.field_delimiter()); } - sink_.append(strings_buffer_[i].data(),strings_buffer_[i].length()); + sink_.append(column_names_[i].data(),column_names_[i].length()); } - if (strings_buffer_.size() > 0) + if (column_names_.size() > 0) { sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); @@ -295,18 +295,18 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::column_mapping: stack_.emplace_back(stack_item_kind::column); row_counts_.push_back(1); - if (strings_buffer_.size() <= row_counts_.back()) + if (column_names_.size() <= row_counts_.back()) { - strings_buffer_.emplace_back(); + column_names_.emplace_back(); } return true; case stack_item_kind::column: { - if (strings_buffer_.size() <= row_counts_.back()) + if (column_names_.size() <= row_counts_.back()) { - strings_buffer_.emplace_back(); + column_names_.emplace_back(); } - jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); begin_value(bo); stack_.emplace_back(stack_item_kind::column_multi_valued_field); return true; @@ -357,23 +357,23 @@ class basic_csv_encoder final : public basic_json_visitor stack_.back().pathname_.append(std::string(name)); //std::cout << "visit_key: " << stack_.back().pathname_ << "\n"; - //buffered_line_[stack_.back().pathname_] = std::basic_string(); + //cname_value_map_[stack_.back().pathname_] = std::basic_string(); //if (stack_[0].count_ == 0 && options_.column_names().size() == 0) //{ - // strings_buffer_.emplace_back(stack_.back().pathname_); + // column_names_.emplace_back(stack_.back().pathname_); //} break; } case stack_item_kind::column_mapping: { - if (strings_buffer_.empty()) + if (column_names_.empty()) { - strings_buffer_.emplace_back(name); + column_names_.emplace_back(name); } else { - strings_buffer_[0].push_back(options_.field_delimiter()); - strings_buffer_[0].append(string_type(name)); + column_names_[0].push_back(options_.field_delimiter()); + column_names_[0].append(string_type(name)); } break; } @@ -393,11 +393,11 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0 && options_.column_names().size() == 0) { - strings_buffer_.emplace_back(stack_.back().pathname_); + column_names_.emplace_back(stack_.back().pathname_); } - buffered_line_[stack_.back().pathname_] = std::basic_string(); - auto it = buffered_line_.find(stack_.back().pathname_); - if (it != buffered_line_.end()) + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -417,17 +417,17 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::column: { - if (strings_buffer_.size() <= row_counts_.back()) + if (column_names_.size() <= row_counts_.back()) { - strings_buffer_.emplace_back(); + column_names_.emplace_back(); } - jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_null_value(bo); break; } case stack_item_kind::column_multi_valued_field: { - jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_null_value(bo); break; } @@ -447,11 +447,11 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0 && options_.column_names().size() == 0) { - strings_buffer_.emplace_back(stack_.back().pathname_); + column_names_.emplace_back(stack_.back().pathname_); } - buffered_line_[stack_.back().pathname_] = std::basic_string(); - auto it = buffered_line_.find(stack_.back().pathname_); - if (it != buffered_line_.end()) + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -471,17 +471,17 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::column: { - if (strings_buffer_.size() <= row_counts_.back()) + if (column_names_.size() <= row_counts_.back()) { - strings_buffer_.emplace_back(); + column_names_.emplace_back(); } - jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_string_value(sv,bo); break; } case stack_item_kind::column_multi_valued_field: { - jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_string_value(sv,bo); break; } @@ -557,11 +557,11 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0 && options_.column_names().size() == 0) { - strings_buffer_.emplace_back(stack_.back().pathname_); + column_names_.emplace_back(stack_.back().pathname_); } - buffered_line_[stack_.back().pathname_] = std::basic_string(); - auto it = buffered_line_.find(stack_.back().pathname_); - if (it != buffered_line_.end()) + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -581,17 +581,17 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::column: { - if (strings_buffer_.size() <= row_counts_.back()) + if (column_names_.size() <= row_counts_.back()) { - strings_buffer_.emplace_back(); + column_names_.emplace_back(); } - jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_double_value(val, context, bo, ec); break; } case stack_item_kind::column_multi_valued_field: { - jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_double_value(val, context, bo, ec); break; } @@ -614,11 +614,11 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0 && options_.column_names().size() == 0) { - strings_buffer_.emplace_back(stack_.back().pathname_); + column_names_.emplace_back(stack_.back().pathname_); } - buffered_line_[stack_.back().pathname_] = std::basic_string(); - auto it = buffered_line_.find(stack_.back().pathname_); - if (it != buffered_line_.end()) + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -638,17 +638,17 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::column: { - if (strings_buffer_.size() <= row_counts_.back()) + if (column_names_.size() <= row_counts_.back()) { - strings_buffer_.emplace_back(); + column_names_.emplace_back(); } - jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_int64_value(val, bo); break; } case stack_item_kind::column_multi_valued_field: { - jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_int64_value(val, bo); break; } @@ -671,11 +671,11 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0 && options_.column_names().size() == 0) { - strings_buffer_.emplace_back(stack_.back().pathname_); + column_names_.emplace_back(stack_.back().pathname_); } - buffered_line_[stack_.back().pathname_] = std::basic_string(); - auto it = buffered_line_.find(stack_.back().pathname_); - if (it != buffered_line_.end()) + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -695,17 +695,17 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::column: { - if (strings_buffer_.size() <= row_counts_.back()) + if (column_names_.size() <= row_counts_.back()) { - strings_buffer_.emplace_back(); + column_names_.emplace_back(); } - jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_uint64_value(val, bo); break; } case stack_item_kind::column_multi_valued_field: { - jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_uint64_value(val, bo); break; } @@ -725,11 +725,11 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0 && options_.column_names().size() == 0) { - strings_buffer_.emplace_back(stack_.back().pathname_); + column_names_.emplace_back(stack_.back().pathname_); } - buffered_line_[stack_.back().pathname_] = std::basic_string(); - auto it = buffered_line_.find(stack_.back().pathname_); - if (it != buffered_line_.end()) + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -749,17 +749,17 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::column: { - if (strings_buffer_.size() <= row_counts_.back()) + if (column_names_.size() <= row_counts_.back()) { - strings_buffer_.emplace_back(); + column_names_.emplace_back(); } - jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_bool_value(val, bo); break; } case stack_item_kind::column_multi_valued_field: { - jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_bool_value(val, bo); break; } From ec8e9d6261c6946b44d36d8e87404d37c4e314a5 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 2 Jan 2025 14:42:16 -0500 Subject: [PATCH 04/79] generalized csv --- include/jsoncons_ext/csv/csv_encoder.hpp | 44 +++++++++++------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index b99471f1df..638ada0711 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -4,23 +4,22 @@ // See https://github.com/danielaparker/jsoncons for latest version -#ifndef JSONCONS_EXT_CSV_CSV_ENCODER_HPP -#define JSONCONS_EXT_CSV_CSV_ENCODER_HPP +#ifndef JSONCONS_CSV_CSV_ENCODER_HPP +#define JSONCONS_CSV_CSV_ENCODER_HPP #include // std::array -#include // std::numeric_limits -#include // std::allocator -#include #include -#include // std::unordered_map -#include // std::move #include - -#include +#include +#include // std::move +#include // std::unordered_map +#include // std::allocator +#include // std::numeric_limits #include #include -#include +#include #include +#include namespace jsoncons { namespace csv { @@ -71,13 +70,12 @@ class basic_csv_encoder final : public basic_json_visitor { stack_item_kind item_kind_; std::size_t count_; + std::string pathname_; stack_item(stack_item_kind item_kind) noexcept - : item_kind_(item_kind), count_(0) + : item_kind_(item_kind), pathname_{}, count_(0) { } - - ~stack_item() = default; bool is_object() const { @@ -97,16 +95,16 @@ class basic_csv_encoder final : public basic_json_visitor std::vector stack_; jsoncons::detail::write_double fp_; - std::unordered_map,std::equal_to,string_string_allocator_type> buffered_line_; - string_type name_; + std::vector column_names_; + std::unordered_map,std::equal_to,string_string_allocator_type> cname_value_map_; + std::size_t column_index_; std::vector row_counts_; -public: // Noncopyable and nonmoveable basic_csv_encoder(const basic_csv_encoder&) = delete; - basic_csv_encoder(basic_csv_encoder&&) = delete; - + basic_csv_encoder& operator=(const basic_csv_encoder&) = delete; +public: basic_csv_encoder(Sink&& sink, const Allocator& alloc = Allocator()) : basic_csv_encoder(std::forward(sink), basic_csv_encode_options(), alloc) @@ -120,7 +118,8 @@ class basic_csv_encoder final : public basic_json_visitor options_(options), alloc_(alloc), stack_(), - fp_(options.float_format(), options.precision()) + fp_(options.float_format(), options.precision()), + column_index_(0) { jsoncons::csv::detail::parse_column_names(options.column_names(), column_names_); } @@ -135,8 +134,6 @@ class basic_csv_encoder final : public basic_json_visitor { } } - basic_csv_encoder& operator=(const basic_csv_encoder&) = delete; - basic_csv_encoder& operator=(basic_csv_encoder&&) = delete; void reset() { @@ -981,7 +978,6 @@ using csv_string_encoder = basic_csv_encoder; using wcsv_string_encoder = basic_csv_encoder>; -} // namespace jsonpath -} // namespace jsoncons +}} -#endif // JSONCONS_EXT_CSV_CSV_ENCODER_HPP +#endif From 838d5cf436f633e420219185d3c8d0ffdafcd131 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Mon, 6 Jan 2025 12:49:03 -0500 Subject: [PATCH 05/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 2 + test/csv/src/csv_encoder_tests.cpp | 107 +++++++++++++++++++++++ test/csv/src/csv_json_tests.cpp | 56 ------------ 3 files changed, 109 insertions(+), 56 deletions(-) create mode 100644 test/csv/src/csv_encoder_tests.cpp delete mode 100644 test/csv/src/csv_json_tests.cpp diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 638ada0711..7350d9f54c 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -271,6 +271,7 @@ class basic_csv_encoder final : public basic_json_visitor switch (stack_.back().item_kind_) { case stack_item_kind::row_mapping: + std::cout << "begin_array: row_mapping"; stack_.emplace_back(stack_item_kind::row); if (stack_[0].count_ == 0) { @@ -327,6 +328,7 @@ class basic_csv_encoder final : public basic_json_visitor switch (stack_.back().item_kind_) { case stack_item_kind::row: + std::cout << "end_array: row"; sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); break; diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp new file mode 100644 index 0000000000..d97af10e36 --- /dev/null +++ b/test/csv/src/csv_encoder_tests.cpp @@ -0,0 +1,107 @@ +// Copyright 2013-2024 Daniel Parker +// Distributed under Boost license + +#include +#include +#include + +namespace csv = jsoncons::csv; + +TEST_CASE("test csv to json") +{ + /*SECTION("array of objects to csv") + { + std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text +true,1971-01-01T04:14:00,1.0,1971-01-01,40,04:14:00,Chicago Reader +true,1948-01-01T14:57:13,1.27,1948-01-01,63,14:57:13,Chicago Sun-Times +)"; + + std::string jtext = R"( +[ + { + "text": "Chicago Reader", + "float": 1.0, + "datetime": "1971-01-01T04:14:00", + "boolean": true, + "nested": { + "time": "04:14:00", + "nested": { + "date": "1971-01-01", + "integer": 40 + } + } + }, + { + "text": "Chicago Sun-Times", + "float": 1.27, + "datetime": "1948-01-01T14:57:13", + "boolean": true, + "nested": { + "time": "14:57:13", + "nested": { + "date": "1948-01-01", + "integer": 63 + } + } + } +] + )"; + + auto j = jsoncons::json::parse(jtext); + std::cout << pretty_print(j) << "\n"; + + std::string buf; + csv::csv_string_encoder encoder(buf); + j.dump(encoder); + + CHECK(expected == buf); + }*/ + + SECTION("array of arrays to csv") + { + std::string expected = R"()"; + + std::string jtext = R"( +[ + [ + "Chicago Reader", + 1.0, + "1971-01-01T04:14:00", + true, + [ + "04:14:00", + [ + "1971-01-01", + 40 + ] + ] + ], + [ + "Chicago Sun-Times", + 1.27, + "1948-01-01T14:57:13", + true, + [ + "14:57:13", + [ + "1948-01-01", + 63 + ] + ] + ] +] + )"; + + auto j = jsoncons::json::parse(jtext); + std::cout << pretty_print(j) << "\n"; + + std::string buf; + csv::csv_string_encoder encoder(buf); + j.dump(encoder); + + std::cout << buf << "\n"; + + //CHECK(expected == buf); + } +} + diff --git a/test/csv/src/csv_json_tests.cpp b/test/csv/src/csv_json_tests.cpp deleted file mode 100644 index 71e72d907b..0000000000 --- a/test/csv/src/csv_json_tests.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2013-2024 Daniel Parker -// Distributed under Boost license - -#include -#include -#include - -namespace csv = jsoncons::csv; - -TEST_CASE("test csv to json") -{ - SECTION("test 1") - { - std::string jtext = R"( -[ - { - "text": "Chicago Reader", - "float": 1.0, - "datetime": "1971-01-01T04:14:00", - "boolean": true, - "nested": { - "time": "04:14:00", - "nested": { - "date": "1971-01-01", - "integer": 40 - } - } - }, - { - "text": "Chicago Sun-Times", - "float": 1.27, - "datetime": "1948-01-01T14:57:13", - "boolean": true, - "nested": { - "time": "14:57:13", - "nested": { - "date": "1948-01-01", - "integer": 63 - } - } - } -] - )"; - - auto j = jsoncons::json::parse(jtext); - std::cout << pretty_print(j) << "\n"; - - std::string buf; - csv::csv_string_encoder encoder(buf); - j.dump(encoder); - - std::cout << buf << "\n"; - } -} - - From 59f3329a81b975e43418207b40179314c38c3e02 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Mon, 6 Jan 2025 14:40:33 -0500 Subject: [PATCH 06/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 169 ++++++++++++++++++----- 1 file changed, 136 insertions(+), 33 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 7350d9f54c..7466960232 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -193,7 +193,6 @@ class basic_csv_encoder final : public basic_json_visitor stack_.emplace_back(stack_item_kind::object); return true; case stack_item_kind::object: - //std::cout << "visit_begin_object: " << stack_.back().pathname_ << "\n"; stack_.emplace_back(stack_item_kind::object); return true; default: // error @@ -268,27 +267,11 @@ class basic_csv_encoder final : public basic_json_visitor stack_.emplace_back(stack_item_kind::row_mapping); return true; } + switch (stack_.back().item_kind_) { case stack_item_kind::row_mapping: - std::cout << "begin_array: row_mapping"; stack_.emplace_back(stack_item_kind::row); - if (stack_[0].count_ == 0) - { - for (std::size_t i = 0; i < column_names_.size(); ++i) - { - if (i > 0) - { - sink_.push_back(options_.field_delimiter()); - } - sink_.append(column_names_[i].data(),column_names_[i].length()); - } - if (column_names_.size() > 0) - { - sink_.append(options_.line_delimiter().data(), - options_.line_delimiter().length()); - } - } return true; case stack_item_kind::object: stack_.emplace_back(stack_item_kind::object_multi_valued_field); @@ -314,7 +297,8 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::row: begin_value(sink_); - stack_.emplace_back(stack_item_kind::row_multi_valued_field); + //stack_.emplace_back(stack_item_kind::row_multi_valued_field); + stack_.emplace_back(stack_item_kind::row); return true; default: // error ec = csv_errc::source_error; @@ -328,9 +312,38 @@ class basic_csv_encoder final : public basic_json_visitor switch (stack_.back().item_kind_) { case stack_item_kind::row: - std::cout << "end_array: row"; - sink_.append(options_.line_delimiter().data(), - options_.line_delimiter().length()); + if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::row_mapping) + { + if (stack_[0].count_ == 0) + { + for (std::size_t i = 0; i < column_names_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + sink_.append(column_names_[i].data(), column_names_[i].length()); + } + sink_.append(options_.line_delimiter().data(), + options_.line_delimiter().length()); + } + + //std::cout << "visit_end_array: write row column_names: " << column_names_.size() << "\n"; + for (std::size_t i = 0; i < column_names_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + auto it = cname_value_map_.find(column_names_[i]); + if (it != cname_value_map_.end()) + { + sink_.append(it->second.data(),it->second.length()); + it->second.clear(); + } + } + sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + } break; case stack_item_kind::column: ++column_index_; @@ -357,13 +370,6 @@ class basic_csv_encoder final : public basic_json_visitor stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; stack_.back().pathname_.push_back('/'); stack_.back().pathname_.append(std::string(name)); - //std::cout << "visit_key: " << stack_.back().pathname_ << "\n"; - - //cname_value_map_[stack_.back().pathname_] = std::basic_string(); - //if (stack_[0].count_ == 0 && options_.column_names().size() == 0) - //{ - // column_names_.emplace_back(stack_.back().pathname_); - //} break; } case stack_item_kind::column_mapping: @@ -384,6 +390,19 @@ class basic_csv_encoder final : public basic_json_visitor } return true; } + + void append_array_path_component() + { + std::basic_string, Allocator> buffer(alloc_); + stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; + stack_.back().pathname_.push_back('/'); + jsoncons::detail::from_integer(stack_.back().count_, buffer); + stack_.back().pathname_.append(buffer); + if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + { + column_names_.emplace_back(stack_.back().pathname_); + } + } bool visit_null(semantic_tag, const ser_context&, std::error_code&) override { @@ -414,6 +433,20 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::row: + { + append_array_path_component(); + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_null_value(bo); + bo.flush(); + it->second.append(s); + } + break; + } case stack_item_kind::row_multi_valued_field: write_null_value(sink_); break; @@ -468,6 +501,20 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::row: + { + append_array_path_component(); + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_string_value(sv,bo); + bo.flush(); + it->second.append(s); + } + break; + } case stack_item_kind::row_multi_valued_field: write_string_value(sv,sink_); break; @@ -578,6 +625,20 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::row: + { + append_array_path_component(); + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_double_value(val, context, bo, ec); + bo.flush(); + it->second.append(s); + } + break; + } case stack_item_kind::row_multi_valued_field: write_double_value(val, context, sink_, ec); break; @@ -635,6 +696,20 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::row: + { + append_array_path_component(); + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_int64_value(val,bo); + bo.flush(); + it->second.append(s); + } + break; + } case stack_item_kind::row_multi_valued_field: write_int64_value(val,sink_); break; @@ -692,6 +767,20 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::row: + { + append_array_path_component(); + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_uint64_value(val, bo); + bo.flush(); + it->second.append(s); + } + break; + } case stack_item_kind::row_multi_valued_field: write_uint64_value(val,sink_); break; @@ -746,6 +835,20 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::row: + { + append_array_path_component(); + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_bool_value(val,bo); + bo.flush(); + it->second.append(s); + } + break; + } case stack_item_kind::row_multi_valued_field: write_bool_value(val,sink_); break; @@ -914,10 +1017,10 @@ class basic_csv_encoder final : public basic_json_visitor switch (stack_.back().item_kind_) { case stack_item_kind::row: - if (stack_.back().count_ > 0) - { - sink.push_back(options_.field_delimiter()); - } + //if (stack_.back().count_ > 0) + //{ + // sink.push_back(options_.field_delimiter()); + //} break; case stack_item_kind::column: { From f484473e6c636abfbc1deb21d60db43be0ec1b9c Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Mon, 6 Jan 2025 19:33:26 -0500 Subject: [PATCH 07/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 106 +++++++++++++++-------- test/csv/src/csv_encoder_tests.cpp | 9 +- 2 files changed, 73 insertions(+), 42 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 7466960232..69d4e9f62f 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -100,6 +100,7 @@ class basic_csv_encoder final : public basic_json_visitor std::size_t column_index_; std::vector row_counts_; + string_type buffer_; // Noncopyable and nonmoveable basic_csv_encoder(const basic_csv_encoder&) = delete; @@ -119,7 +120,8 @@ class basic_csv_encoder final : public basic_json_visitor alloc_(alloc), stack_(), fp_(options.float_format(), options.precision()), - column_index_(0) + column_index_(0), + buffer_(alloc) { jsoncons::csv::detail::parse_column_names(options.column_names(), column_names_); } @@ -393,12 +395,12 @@ class basic_csv_encoder final : public basic_json_visitor void append_array_path_component() { - std::basic_string, Allocator> buffer(alloc_); stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; stack_.back().pathname_.push_back('/'); - jsoncons::detail::from_integer(stack_.back().count_, buffer); - stack_.back().pathname_.append(buffer); - if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + buffer_.clear(); + jsoncons::detail::from_integer(stack_.back().count_, buffer_); + stack_.back().pathname_.append(buffer_); + if (stack_[0].count_ == 0 && options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); } @@ -412,11 +414,14 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { - if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + if (stack_[0].count_ == 0) { - column_names_.emplace_back(stack_.back().pathname_); + if (options_.column_names().empty()) + { + column_names_.emplace_back(stack_.back().pathname_); + } + cname_value_map_[stack_.back().pathname_] = std::basic_string(); } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); auto it = cname_value_map_.find(stack_.back().pathname_); if (it != cname_value_map_.end()) { @@ -435,7 +440,10 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + if (stack_[0].count_ == 0) + { + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } auto it = cname_value_map_.find(stack_.back().pathname_); if (it != cname_value_map_.end()) { @@ -480,11 +488,14 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { - if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + if (stack_[0].count_ == 0) { - column_names_.emplace_back(stack_.back().pathname_); + if (options_.column_names().empty()) + { + column_names_.emplace_back(stack_.back().pathname_); + } + cname_value_map_[stack_.back().pathname_] = std::basic_string(); } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); auto it = cname_value_map_.find(stack_.back().pathname_); if (it != cname_value_map_.end()) { @@ -492,18 +503,17 @@ class basic_csv_encoder final : public basic_json_visitor jsoncons::string_sink> bo(s); write_string_value(sv,bo); bo.flush(); - if (!it->second.empty() && options_.subfield_delimiter() != char_type()) - { - it->second.push_back(options_.subfield_delimiter()); - } - it->second.append(s); + cname_value_map_[stack_.back().pathname_] = s; } break; } case stack_item_kind::row: { append_array_path_component(); - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + if (stack_[0].count_ == 0) + { + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } auto it = cname_value_map_.find(stack_.back().pathname_); if (it != cname_value_map_.end()) { @@ -604,11 +614,14 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { - if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + if (stack_[0].count_ == 0) { - column_names_.emplace_back(stack_.back().pathname_); + if (options_.column_names().empty()) + { + column_names_.emplace_back(stack_.back().pathname_); + } + cname_value_map_[stack_.back().pathname_] = std::basic_string(); } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); auto it = cname_value_map_.find(stack_.back().pathname_); if (it != cname_value_map_.end()) { @@ -627,7 +640,10 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + if (stack_[0].count_ == 0) + { + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } auto it = cname_value_map_.find(stack_.back().pathname_); if (it != cname_value_map_.end()) { @@ -675,11 +691,14 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { - if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + if (stack_[0].count_ == 0) { - column_names_.emplace_back(stack_.back().pathname_); + if (options_.column_names().empty()) + { + column_names_.emplace_back(stack_.back().pathname_); + } + cname_value_map_[stack_.back().pathname_] = std::basic_string(); } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); auto it = cname_value_map_.find(stack_.back().pathname_); if (it != cname_value_map_.end()) { @@ -698,7 +717,10 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + if (stack_[0].count_ == 0) + { + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } auto it = cname_value_map_.find(stack_.back().pathname_); if (it != cname_value_map_.end()) { @@ -746,11 +768,14 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { - if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + if (stack_[0].count_ == 0) { - column_names_.emplace_back(stack_.back().pathname_); + if (options_.column_names().empty()) + { + column_names_.emplace_back(stack_.back().pathname_); + } + cname_value_map_[stack_.back().pathname_] = std::basic_string(); } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); auto it = cname_value_map_.find(stack_.back().pathname_); if (it != cname_value_map_.end()) { @@ -769,7 +794,10 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + if (stack_[0].count_ == 0) + { + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } auto it = cname_value_map_.find(stack_.back().pathname_); if (it != cname_value_map_.end()) { @@ -814,11 +842,14 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { - if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + if (stack_[0].count_ == 0) { - column_names_.emplace_back(stack_.back().pathname_); + if (options_.column_names().empty()) + { + column_names_.emplace_back(stack_.back().pathname_); + } + cname_value_map_[stack_.back().pathname_] = std::basic_string(); } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); auto it = cname_value_map_.find(stack_.back().pathname_); if (it != cname_value_map_.end()) { @@ -837,7 +868,10 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + if (stack_[0].count_ == 0) + { + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } auto it = cname_value_map_.find(stack_.back().pathname_); if (it != cname_value_map_.end()) { @@ -1017,10 +1051,6 @@ class basic_csv_encoder final : public basic_json_visitor switch (stack_.back().item_kind_) { case stack_item_kind::row: - //if (stack_.back().count_ > 0) - //{ - // sink.push_back(options_.field_delimiter()); - //} break; case stack_item_kind::column: { diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index d97af10e36..48652530a7 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -59,7 +59,10 @@ true,1948-01-01T14:57:13,1.27,1948-01-01,63,14:57:13,Chicago Sun-Times SECTION("array of arrays to csv") { - std::string expected = R"()"; + std::string expected = R"(/0,/1,/2,/3,/3/0,/3/0/0,/3/0/1 +Chicago Reader,1.0,1971-01-01T04:14:00,true,04:14:00,1971-01-01,40 +Chicago Sun-Times,1.27,1948-01-01T14:57:13,true,14:57:13,1948-01-01,63 +)"; std::string jtext = R"( [ @@ -99,9 +102,7 @@ true,1948-01-01T14:57:13,1.27,1948-01-01,63,14:57:13,Chicago Sun-Times csv::csv_string_encoder encoder(buf); j.dump(encoder); - std::cout << buf << "\n"; - - //CHECK(expected == buf); + CHECK(expected == buf); } } From 337a5ce27bbc734945a7256c84fa7dab5dfc4dae Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 8 Jan 2025 23:29:14 -0500 Subject: [PATCH 08/79] generalized csv --- include/jsoncons_ext/csv/csv_encoder.hpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 69d4e9f62f..70e3e9c6ba 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -1,25 +1,27 @@ -// Copyright 2013-2024 Daniel Parker +// Copyright 2013-2025 Daniel Parker // Distributed under the Boost license, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // See https://github.com/danielaparker/jsoncons for latest version -#ifndef JSONCONS_CSV_CSV_ENCODER_HPP -#define JSONCONS_CSV_CSV_ENCODER_HPP +#ifndef JSONCONS_EXT_CSV_CSV_ENCODER_HPP +#define JSONCONS_EXT_CSV_CSV_ENCODER_HPP #include // std::array -#include -#include +#include // std::numeric_limits +#include // std::allocator #include -#include // std::move +#include #include // std::unordered_map -#include // std::allocator -#include // std::numeric_limits +#include // std::move +#include + +#include +#include #include #include -#include -#include #include +#include namespace jsoncons { namespace csv { From 6169758944e11db53fc455ea6e52218ae01a338a Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 9 Jan 2025 15:36:05 -0500 Subject: [PATCH 09/79] merge --- include/jsoncons_ext/csv/csv_encoder.hpp.orig | 1145 +++++++++++++++++ 1 file changed, 1145 insertions(+) create mode 100644 include/jsoncons_ext/csv/csv_encoder.hpp.orig diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp.orig b/include/jsoncons_ext/csv/csv_encoder.hpp.orig new file mode 100644 index 0000000000..60fc03fe41 --- /dev/null +++ b/include/jsoncons_ext/csv/csv_encoder.hpp.orig @@ -0,0 +1,1145 @@ +// Copyright 2013-2025 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_EXT_CSV_CSV_ENCODER_HPP +#define JSONCONS_EXT_CSV_CSV_ENCODER_HPP + +#include // std::array +#include // std::numeric_limits +#include // std::allocator +#include +#include +#include // std::unordered_map +#include // std::move +#include + +#include +#include +#include +#include +#include +#include + +namespace jsoncons { namespace csv { + +template ,typename Allocator=std::allocator> +class basic_csv_encoder final : public basic_json_visitor +{ +public: + using char_type = CharT; + using typename basic_json_visitor::string_view_type; + using sink_type = Sink; + + using allocator_type = Allocator; + using char_allocator_type = typename std::allocator_traits:: template rebind_alloc; + using string_type = std::basic_string, char_allocator_type>; + using string_allocator_type = typename std::allocator_traits:: template rebind_alloc; + using string_string_allocator_type = typename std::allocator_traits:: template rebind_alloc>; + +private: + static jsoncons::basic_string_view null_constant() + { + static jsoncons::basic_string_view k = JSONCONS_STRING_VIEW_CONSTANT(CharT,"null"); + return k; + } + static jsoncons::basic_string_view true_constant() + { + static jsoncons::basic_string_view k = JSONCONS_STRING_VIEW_CONSTANT(CharT,"true"); + return k; + } + static jsoncons::basic_string_view false_constant() + { + static jsoncons::basic_string_view k = JSONCONS_STRING_VIEW_CONSTANT(CharT,"false"); + return k; + } + + enum class stack_item_kind + { + row_mapping, + column_mapping, + object, + row, + column, + object_multi_valued_field, + row_multi_valued_field, + column_multi_valued_field + }; + + struct stack_item + { + stack_item_kind item_kind_; + std::size_t count_; + std::string pathname_; + + stack_item(stack_item_kind item_kind) noexcept + : item_kind_(item_kind), pathname_{}, count_(0) + { + } + + bool is_object() const + { + return item_kind_ == stack_item_kind::object; + } + + stack_item_kind item_kind() const + { + return item_kind_; + } + }; + + Sink sink_; + const basic_csv_encode_options options_; + allocator_type alloc_; + + std::vector stack_; + jsoncons::detail::write_double fp_; + + std::vector column_names_; + std::unordered_map,std::equal_to,string_string_allocator_type> cname_value_map_; + + std::size_t column_index_; + std::vector row_counts_; + string_type buffer_; + + // Noncopyable and nonmoveable + basic_csv_encoder(const basic_csv_encoder&) = delete; + basic_csv_encoder& operator=(const basic_csv_encoder&) = delete; +public: + basic_csv_encoder(Sink&& sink, + const Allocator& alloc = Allocator()) + : basic_csv_encoder(std::forward(sink), basic_csv_encode_options(), alloc) + { + } + + basic_csv_encoder(Sink&& sink, + const basic_csv_encode_options& options, + const Allocator& alloc = Allocator()) + : sink_(std::forward(sink)), + options_(options), + alloc_(alloc), + stack_(), + fp_(options.float_format(), options.precision()), + column_index_(0), + buffer_(alloc) + { + jsoncons::csv::detail::parse_column_names(options.column_names(), column_names_); + } + + ~basic_csv_encoder() noexcept + { + JSONCONS_TRY + { + sink_.flush(); + } + JSONCONS_CATCH(...) + { + } + } + + void reset() + { + stack_.clear(); + column_names_.clear(); + cname_value_map_.clear(); + column_index_ = 0; + row_counts_.clear(); + } + + void reset(Sink&& sink) + { + sink_ = std::move(sink); + reset(); + } + +private: + + template + void escape_string(const CharT* s, + std::size_t length, + CharT quote_char, CharT quote_escape_char, + AnyWriter& sink) + { + const CharT* begin = s; + const CharT* end = s + length; + for (const CharT* it = begin; it != end; ++it) + { + CharT c = *it; + if (c == quote_char) + { + sink.push_back(quote_escape_char); + sink.push_back(quote_char); + } + else + { + sink.push_back(c); + } + } + } + + void visit_flush() override + { + sink_.flush(); + } + + bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override + { + if (stack_.empty()) + { + stack_.emplace_back(stack_item_kind::column_mapping); + return true; + } + switch (stack_.back().item_kind_) + { + case stack_item_kind::row_mapping: + stack_.emplace_back(stack_item_kind::object); + return true; + case stack_item_kind::object: + stack_.emplace_back(stack_item_kind::object); + return true; + default: // error + ec = csv_errc::source_error; + return false; + } + } + + bool visit_end_object(const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::row_mapping) + { + if (stack_[0].count_ == 0) + { + for (std::size_t i = 0; i < column_names_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + sink_.append(column_names_[i].data(), + column_names_[i].length()); + } + sink_.append(options_.line_delimiter().data(), + options_.line_delimiter().length()); + } + for (std::size_t i = 0; i < column_names_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + auto it = cname_value_map_.find(column_names_[i]); + if (it != cname_value_map_.end()) + { + sink_.append(it->second.data(),it->second.length()); + it->second.clear(); + } + } + sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + } +<<<<<<< HEAD +======= + for (std::size_t i = 0; i < strings_buffer_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + auto it = buffered_line_.find(strings_buffer_[i]); + if (it != buffered_line_.end()) + { + sink_.append((*it).second.data(),(*it).second.length()); + (*it).second.clear(); + } + } + sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); +>>>>>>> eee098be8f774f64527281d5beb9e90aa9b73e06 + break; + case stack_item_kind::column_mapping: + { + for (const auto& item : column_names_) + { + sink_.append(item.data(), item.size()); + sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + } + break; + } + default: + break; + } + stack_.pop_back(); + if (!stack_.empty()) + { + end_value(); + } + return true; + } + + bool visit_begin_array(semantic_tag, const ser_context&, std::error_code& ec) override + { + if (stack_.empty()) + { + stack_.emplace_back(stack_item_kind::row_mapping); + return true; + } + + switch (stack_.back().item_kind_) + { + case stack_item_kind::row_mapping: + stack_.emplace_back(stack_item_kind::row); + return true; + case stack_item_kind::object: + stack_.emplace_back(stack_item_kind::object_multi_valued_field); + return true; + case stack_item_kind::column_mapping: + stack_.emplace_back(stack_item_kind::column); + row_counts_.push_back(1); + if (column_names_.size() <= row_counts_.back()) + { + column_names_.emplace_back(); + } + return true; + case stack_item_kind::column: + { + if (column_names_.size() <= row_counts_.back()) + { + column_names_.emplace_back(); + } + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + begin_value(bo); + stack_.emplace_back(stack_item_kind::column_multi_valued_field); + return true; + } + case stack_item_kind::row: + begin_value(sink_); + //stack_.emplace_back(stack_item_kind::row_multi_valued_field); + stack_.emplace_back(stack_item_kind::row); + return true; + default: // error + ec = csv_errc::source_error; + return false; + } + } + + bool visit_end_array(const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::row: + if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::row_mapping) + { + if (stack_[0].count_ == 0) + { + for (std::size_t i = 0; i < column_names_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + sink_.append(column_names_[i].data(), column_names_[i].length()); + } + sink_.append(options_.line_delimiter().data(), + options_.line_delimiter().length()); + } + + //std::cout << "visit_end_array: write row column_names: " << column_names_.size() << "\n"; + for (std::size_t i = 0; i < column_names_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + auto it = cname_value_map_.find(column_names_[i]); + if (it != cname_value_map_.end()) + { + sink_.append(it->second.data(),it->second.length()); + it->second.clear(); + } + } + sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + } + break; + case stack_item_kind::column: + ++column_index_; + break; + default: + break; + } + stack_.pop_back(); + + if (!stack_.empty()) + { + end_value(); + } + return true; + } + + bool visit_key(const string_view_type& name, const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + { + stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; + stack_.back().pathname_.push_back('/'); + stack_.back().pathname_.append(std::string(name)); + break; + } + case stack_item_kind::column_mapping: + { + if (column_names_.empty()) + { + column_names_.emplace_back(name); + } + else + { + column_names_[0].push_back(options_.field_delimiter()); + column_names_[0].append(string_type(name)); + } + break; + } + default: + break; + } + return true; + } + + void append_array_path_component() + { + stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; + stack_.back().pathname_.push_back('/'); + buffer_.clear(); + jsoncons::detail::from_integer(stack_.back().count_, buffer_); + stack_.back().pathname_.append(buffer_); + if (stack_[0].count_ == 0 && options_.column_names().empty()) + { + column_names_.emplace_back(stack_.back().pathname_); + } + } + + bool visit_null(semantic_tag, const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + if (stack_[0].count_ == 0) + { + if (options_.column_names().empty()) + { + column_names_.emplace_back(stack_.back().pathname_); + } + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_null_value(bo); + bo.flush(); + if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) + { + (*it).second.push_back(options_.subfield_delimiter()); + } + (*it).second.append(s); + } + break; + } + case stack_item_kind::row: + { + append_array_path_component(); + if (stack_[0].count_ == 0) + { + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_null_value(bo); + bo.flush(); + it->second.append(s); + } + break; + } + case stack_item_kind::row_multi_valued_field: + write_null_value(sink_); + break; + case stack_item_kind::column: + { + if (column_names_.size() <= row_counts_.back()) + { + column_names_.emplace_back(); + } + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + write_null_value(bo); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + write_null_value(bo); + break; + } + default: + break; + } + return true; + } + + bool visit_string(const string_view_type& sv, semantic_tag, const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + if (stack_[0].count_ == 0) + { + if (options_.column_names().empty()) + { + column_names_.emplace_back(stack_.back().pathname_); + } + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_string_value(sv,bo); + bo.flush(); +<<<<<<< HEAD + cname_value_map_[stack_.back().pathname_] = s; +======= + if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) + { + (*it).second.push_back(options_.subfield_delimiter()); + } + (*it).second.append(s); +>>>>>>> eee098be8f774f64527281d5beb9e90aa9b73e06 + } + break; + } + case stack_item_kind::row: + { + append_array_path_component(); + if (stack_[0].count_ == 0) + { + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_string_value(sv,bo); + bo.flush(); + it->second.append(s); + } + break; + } + case stack_item_kind::row_multi_valued_field: + write_string_value(sv,sink_); + break; + case stack_item_kind::column: + { + if (column_names_.size() <= row_counts_.back()) + { + column_names_.emplace_back(); + } + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + write_string_value(sv,bo); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + write_string_value(sv,bo); + break; + } + default: + break; + } + return true; + } + + bool visit_byte_string(const byte_string_view& b, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + byte_string_chars_format encoding_hint; + switch (tag) + { + case semantic_tag::base16: + encoding_hint = byte_string_chars_format::base16; + break; + case semantic_tag::base64: + encoding_hint = byte_string_chars_format::base64; + break; + case semantic_tag::base64url: + encoding_hint = byte_string_chars_format::base64url; + break; + default: + encoding_hint = byte_string_chars_format::none; + break; + } + byte_string_chars_format format = jsoncons::detail::resolve_byte_string_chars_format(encoding_hint,byte_string_chars_format::none,byte_string_chars_format::base64url); + + std::basic_string s; + switch (format) + { + case byte_string_chars_format::base16: + { + encode_base16(b.begin(),b.end(),s); + visit_string(s, semantic_tag::none, context, ec); + break; + } + case byte_string_chars_format::base64: + { + encode_base64(b.begin(),b.end(),s); + visit_string(s, semantic_tag::none, context, ec); + break; + } + case byte_string_chars_format::base64url: + { + encode_base64url(b.begin(),b.end(),s); + visit_string(s, semantic_tag::none, context, ec); + break; + } + default: + { + JSONCONS_UNREACHABLE(); + } + } + + return true; + } + + bool visit_double(double val, + semantic_tag, + const ser_context& context, + std::error_code& ec) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + if (stack_[0].count_ == 0) + { + if (options_.column_names().empty()) + { + column_names_.emplace_back(stack_.back().pathname_); + } + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_double_value(val, context, bo, ec); + bo.flush(); + if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) + { + (*it).second.push_back(options_.subfield_delimiter()); + } + (*it).second.append(s); + } + break; + } + case stack_item_kind::row: + { + append_array_path_component(); + if (stack_[0].count_ == 0) + { + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_double_value(val, context, bo, ec); + bo.flush(); + it->second.append(s); + } + break; + } + case stack_item_kind::row_multi_valued_field: + write_double_value(val, context, sink_, ec); + break; + case stack_item_kind::column: + { + if (column_names_.size() <= row_counts_.back()) + { + column_names_.emplace_back(); + } + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + write_double_value(val, context, bo, ec); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + write_double_value(val, context, bo, ec); + break; + } + default: + break; + } + return true; + } + + bool visit_int64(int64_t val, + semantic_tag, + const ser_context&, + std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + if (stack_[0].count_ == 0) + { + if (options_.column_names().empty()) + { + column_names_.emplace_back(stack_.back().pathname_); + } + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_int64_value(val,bo); + bo.flush(); + if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) + { + (*it).second.push_back(options_.subfield_delimiter()); + } + (*it).second.append(s); + } + break; + } + case stack_item_kind::row: + { + append_array_path_component(); + if (stack_[0].count_ == 0) + { + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_int64_value(val,bo); + bo.flush(); + it->second.append(s); + } + break; + } + case stack_item_kind::row_multi_valued_field: + write_int64_value(val,sink_); + break; + case stack_item_kind::column: + { + if (column_names_.size() <= row_counts_.back()) + { + column_names_.emplace_back(); + } + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + write_int64_value(val, bo); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + write_int64_value(val, bo); + break; + } + default: + break; + } + return true; + } + + bool visit_uint64(uint64_t val, + semantic_tag, + const ser_context&, + std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + if (stack_[0].count_ == 0) + { + if (options_.column_names().empty()) + { + column_names_.emplace_back(stack_.back().pathname_); + } + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_uint64_value(val, bo); + bo.flush(); + if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) + { + (*it).second.push_back(options_.subfield_delimiter()); + } + (*it).second.append(s); + } + break; + } + case stack_item_kind::row: + { + append_array_path_component(); + if (stack_[0].count_ == 0) + { + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_uint64_value(val, bo); + bo.flush(); + it->second.append(s); + } + break; + } + case stack_item_kind::row_multi_valued_field: + write_uint64_value(val,sink_); + break; + case stack_item_kind::column: + { + if (column_names_.size() <= row_counts_.back()) + { + column_names_.emplace_back(); + } + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + write_uint64_value(val, bo); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + write_uint64_value(val, bo); + break; + } + default: + break; + } + return true; + } + + bool visit_bool(bool val, semantic_tag, const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + if (stack_[0].count_ == 0) + { + if (options_.column_names().empty()) + { + column_names_.emplace_back(stack_.back().pathname_); + } + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_bool_value(val,bo); + bo.flush(); + if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) + { + (*it).second.push_back(options_.subfield_delimiter()); + } + (*it).second.append(s); + } + break; + } + case stack_item_kind::row: + { + append_array_path_component(); + if (stack_[0].count_ == 0) + { + cname_value_map_[stack_.back().pathname_] = std::basic_string(); + } + auto it = cname_value_map_.find(stack_.back().pathname_); + if (it != cname_value_map_.end()) + { + std::basic_string s; + jsoncons::string_sink> bo(s); + write_bool_value(val,bo); + bo.flush(); + it->second.append(s); + } + break; + } + case stack_item_kind::row_multi_valued_field: + write_bool_value(val,sink_); + break; + case stack_item_kind::column: + { + if (column_names_.size() <= row_counts_.back()) + { + column_names_.emplace_back(); + } + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + write_bool_value(val, bo); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + write_bool_value(val, bo); + break; + } + default: + break; + } + return true; + } + + template + bool do_string_value(const CharT* s, std::size_t length, AnyWriter& sink) + { + bool quote = false; + if (options_.quote_style() == quote_style_kind::all || options_.quote_style() == quote_style_kind::nonnumeric || + (options_.quote_style() == quote_style_kind::minimal && + (std::char_traits::find(s, length, options_.field_delimiter()) != nullptr || std::char_traits::find(s, length, options_.quote_char()) != nullptr))) + { + quote = true; + sink.push_back(options_.quote_char()); + } + escape_string(s, length, options_.quote_char(), options_.quote_escape_char(), sink); + if (quote) + { + sink.push_back(options_.quote_char()); + } + + return true; + } + + template + void write_string_value(const string_view_type& value, AnyWriter& sink) + { + begin_value(sink); + do_string_value(value.data(),value.length(),sink); + end_value(); + } + + template + void write_double_value(double val, const ser_context& context, AnyWriter& sink, std::error_code& ec) + { + begin_value(sink); + + if (!std::isfinite(val)) + { + if ((std::isnan)(val)) + { + if (options_.enable_nan_to_num()) + { + sink.append(options_.nan_to_num().data(), options_.nan_to_num().length()); + } + else if (options_.enable_nan_to_str()) + { + visit_string(options_.nan_to_str(), semantic_tag::none, context, ec); + } + else + { + sink.append(null_constant().data(), null_constant().size()); + } + } + else if (val == std::numeric_limits::infinity()) + { + if (options_.enable_inf_to_num()) + { + sink.append(options_.inf_to_num().data(), options_.inf_to_num().length()); + } + else if (options_.enable_inf_to_str()) + { + visit_string(options_.inf_to_str(), semantic_tag::none, context, ec); + } + else + { + sink.append(null_constant().data(), null_constant().size()); + } + } + else + { + if (options_.enable_neginf_to_num()) + { + sink.append(options_.neginf_to_num().data(), options_.neginf_to_num().length()); + } + else if (options_.enable_neginf_to_str()) + { + visit_string(options_.neginf_to_str(), semantic_tag::none, context, ec); + } + else + { + sink.append(null_constant().data(), null_constant().size()); + } + } + } + else + { + fp_(val, sink); + } + + end_value(); + + } + + template + void write_int64_value(int64_t val, AnyWriter& sink) + { + begin_value(sink); + + jsoncons::detail::from_integer(val,sink); + + end_value(); + } + + template + void write_uint64_value(uint64_t val, AnyWriter& sink) + { + begin_value(sink); + + jsoncons::detail::from_integer(val,sink); + + end_value(); + } + + template + void write_bool_value(bool val, AnyWriter& sink) + { + begin_value(sink); + + if (val) + { + sink.append(true_constant().data(), true_constant().size()); + } + else + { + sink.append(false_constant().data(), false_constant().size()); + } + + end_value(); + } + + template + bool write_null_value(AnyWriter& sink) + { + begin_value(sink); + sink.append(null_constant().data(), null_constant().size()); + end_value(); + return true; + } + + template + void begin_value(AnyWriter& sink) + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::row: + break; + case stack_item_kind::column: + { + if (row_counts_.size() >= 3) + { + for (std::size_t i = row_counts_.size()-2; i-- > 0;) + { + if (row_counts_[i] <= row_counts_.back()) + { + sink.push_back(options_.field_delimiter()); + } + else + { + break; + } + } + } + if (column_index_ > 0) + { + sink.push_back(options_.field_delimiter()); + } + break; + } + case stack_item_kind::row_multi_valued_field: + case stack_item_kind::column_multi_valued_field: + if (stack_.back().count_ > 0 && options_.subfield_delimiter() != char_type()) + { + sink.push_back(options_.subfield_delimiter()); + } + break; + default: + break; + } + } + + void end_value() + { + JSONCONS_ASSERT(!stack_.empty()); + switch(stack_.back().item_kind_) + { + case stack_item_kind::row: + { + ++stack_.back().count_; + break; + } + case stack_item_kind::column: + { + ++row_counts_.back(); + break; + } + default: + ++stack_.back().count_; + break; + } + } +}; + +using csv_stream_encoder = basic_csv_encoder; +using csv_string_encoder = basic_csv_encoder>; +using csv_wstream_encoder = basic_csv_encoder; +using wcsv_string_encoder = basic_csv_encoder>; + +}} + +#endif From f7a90f549bf0172ce58fb1225c984d2d4e58196a Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 9 Jan 2025 15:37:25 -0500 Subject: [PATCH 10/79] merge --- include/jsoncons_ext/csv/csv_encoder.hpp | 55 +- include/jsoncons_ext/csv/csv_encoder.hpp.orig | 1145 ----------------- 2 files changed, 15 insertions(+), 1185 deletions(-) delete mode 100644 include/jsoncons_ext/csv/csv_encoder.hpp.orig diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 60fc03fe41..70e3e9c6ba 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -243,23 +243,6 @@ class basic_csv_encoder final : public basic_json_visitor } sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); } -<<<<<<< HEAD -======= - for (std::size_t i = 0; i < strings_buffer_.size(); ++i) - { - if (i > 0) - { - sink_.push_back(options_.field_delimiter()); - } - auto it = buffered_line_.find(strings_buffer_[i]); - if (it != buffered_line_.end()) - { - sink_.append((*it).second.data(),(*it).second.length()); - (*it).second.clear(); - } - } - sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); ->>>>>>> eee098be8f774f64527281d5beb9e90aa9b73e06 break; case stack_item_kind::column_mapping: { @@ -448,11 +431,11 @@ class basic_csv_encoder final : public basic_json_visitor jsoncons::string_sink> bo(s); write_null_value(bo); bo.flush(); - if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { - (*it).second.push_back(options_.subfield_delimiter()); + it->second.push_back(options_.subfield_delimiter()); } - (*it).second.append(s); + it->second.append(s); } break; } @@ -522,15 +505,7 @@ class basic_csv_encoder final : public basic_json_visitor jsoncons::string_sink> bo(s); write_string_value(sv,bo); bo.flush(); -<<<<<<< HEAD cname_value_map_[stack_.back().pathname_] = s; -======= - if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) - { - (*it).second.push_back(options_.subfield_delimiter()); - } - (*it).second.append(s); ->>>>>>> eee098be8f774f64527281d5beb9e90aa9b73e06 } break; } @@ -656,11 +631,11 @@ class basic_csv_encoder final : public basic_json_visitor jsoncons::string_sink> bo(s); write_double_value(val, context, bo, ec); bo.flush(); - if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { - (*it).second.push_back(options_.subfield_delimiter()); + it->second.push_back(options_.subfield_delimiter()); } - (*it).second.append(s); + it->second.append(s); } break; } @@ -733,11 +708,11 @@ class basic_csv_encoder final : public basic_json_visitor jsoncons::string_sink> bo(s); write_int64_value(val,bo); bo.flush(); - if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { - (*it).second.push_back(options_.subfield_delimiter()); + it->second.push_back(options_.subfield_delimiter()); } - (*it).second.append(s); + it->second.append(s); } break; } @@ -810,11 +785,11 @@ class basic_csv_encoder final : public basic_json_visitor jsoncons::string_sink> bo(s); write_uint64_value(val, bo); bo.flush(); - if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { - (*it).second.push_back(options_.subfield_delimiter()); + it->second.push_back(options_.subfield_delimiter()); } - (*it).second.append(s); + it->second.append(s); } break; } @@ -884,11 +859,11 @@ class basic_csv_encoder final : public basic_json_visitor jsoncons::string_sink> bo(s); write_bool_value(val,bo); bo.flush(); - if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { - (*it).second.push_back(options_.subfield_delimiter()); + it->second.push_back(options_.subfield_delimiter()); } - (*it).second.append(s); + it->second.append(s); } break; } diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp.orig b/include/jsoncons_ext/csv/csv_encoder.hpp.orig deleted file mode 100644 index 60fc03fe41..0000000000 --- a/include/jsoncons_ext/csv/csv_encoder.hpp.orig +++ /dev/null @@ -1,1145 +0,0 @@ -// Copyright 2013-2025 Daniel Parker -// Distributed under the Boost license, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -// See https://github.com/danielaparker/jsoncons for latest version - -#ifndef JSONCONS_EXT_CSV_CSV_ENCODER_HPP -#define JSONCONS_EXT_CSV_CSV_ENCODER_HPP - -#include // std::array -#include // std::numeric_limits -#include // std::allocator -#include -#include -#include // std::unordered_map -#include // std::move -#include - -#include -#include -#include -#include -#include -#include - -namespace jsoncons { namespace csv { - -template ,typename Allocator=std::allocator> -class basic_csv_encoder final : public basic_json_visitor -{ -public: - using char_type = CharT; - using typename basic_json_visitor::string_view_type; - using sink_type = Sink; - - using allocator_type = Allocator; - using char_allocator_type = typename std::allocator_traits:: template rebind_alloc; - using string_type = std::basic_string, char_allocator_type>; - using string_allocator_type = typename std::allocator_traits:: template rebind_alloc; - using string_string_allocator_type = typename std::allocator_traits:: template rebind_alloc>; - -private: - static jsoncons::basic_string_view null_constant() - { - static jsoncons::basic_string_view k = JSONCONS_STRING_VIEW_CONSTANT(CharT,"null"); - return k; - } - static jsoncons::basic_string_view true_constant() - { - static jsoncons::basic_string_view k = JSONCONS_STRING_VIEW_CONSTANT(CharT,"true"); - return k; - } - static jsoncons::basic_string_view false_constant() - { - static jsoncons::basic_string_view k = JSONCONS_STRING_VIEW_CONSTANT(CharT,"false"); - return k; - } - - enum class stack_item_kind - { - row_mapping, - column_mapping, - object, - row, - column, - object_multi_valued_field, - row_multi_valued_field, - column_multi_valued_field - }; - - struct stack_item - { - stack_item_kind item_kind_; - std::size_t count_; - std::string pathname_; - - stack_item(stack_item_kind item_kind) noexcept - : item_kind_(item_kind), pathname_{}, count_(0) - { - } - - bool is_object() const - { - return item_kind_ == stack_item_kind::object; - } - - stack_item_kind item_kind() const - { - return item_kind_; - } - }; - - Sink sink_; - const basic_csv_encode_options options_; - allocator_type alloc_; - - std::vector stack_; - jsoncons::detail::write_double fp_; - - std::vector column_names_; - std::unordered_map,std::equal_to,string_string_allocator_type> cname_value_map_; - - std::size_t column_index_; - std::vector row_counts_; - string_type buffer_; - - // Noncopyable and nonmoveable - basic_csv_encoder(const basic_csv_encoder&) = delete; - basic_csv_encoder& operator=(const basic_csv_encoder&) = delete; -public: - basic_csv_encoder(Sink&& sink, - const Allocator& alloc = Allocator()) - : basic_csv_encoder(std::forward(sink), basic_csv_encode_options(), alloc) - { - } - - basic_csv_encoder(Sink&& sink, - const basic_csv_encode_options& options, - const Allocator& alloc = Allocator()) - : sink_(std::forward(sink)), - options_(options), - alloc_(alloc), - stack_(), - fp_(options.float_format(), options.precision()), - column_index_(0), - buffer_(alloc) - { - jsoncons::csv::detail::parse_column_names(options.column_names(), column_names_); - } - - ~basic_csv_encoder() noexcept - { - JSONCONS_TRY - { - sink_.flush(); - } - JSONCONS_CATCH(...) - { - } - } - - void reset() - { - stack_.clear(); - column_names_.clear(); - cname_value_map_.clear(); - column_index_ = 0; - row_counts_.clear(); - } - - void reset(Sink&& sink) - { - sink_ = std::move(sink); - reset(); - } - -private: - - template - void escape_string(const CharT* s, - std::size_t length, - CharT quote_char, CharT quote_escape_char, - AnyWriter& sink) - { - const CharT* begin = s; - const CharT* end = s + length; - for (const CharT* it = begin; it != end; ++it) - { - CharT c = *it; - if (c == quote_char) - { - sink.push_back(quote_escape_char); - sink.push_back(quote_char); - } - else - { - sink.push_back(c); - } - } - } - - void visit_flush() override - { - sink_.flush(); - } - - bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override - { - if (stack_.empty()) - { - stack_.emplace_back(stack_item_kind::column_mapping); - return true; - } - switch (stack_.back().item_kind_) - { - case stack_item_kind::row_mapping: - stack_.emplace_back(stack_item_kind::object); - return true; - case stack_item_kind::object: - stack_.emplace_back(stack_item_kind::object); - return true; - default: // error - ec = csv_errc::source_error; - return false; - } - } - - bool visit_end_object(const ser_context&, std::error_code&) override - { - JSONCONS_ASSERT(!stack_.empty()); - - switch (stack_.back().item_kind_) - { - case stack_item_kind::object: - if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::row_mapping) - { - if (stack_[0].count_ == 0) - { - for (std::size_t i = 0; i < column_names_.size(); ++i) - { - if (i > 0) - { - sink_.push_back(options_.field_delimiter()); - } - sink_.append(column_names_[i].data(), - column_names_[i].length()); - } - sink_.append(options_.line_delimiter().data(), - options_.line_delimiter().length()); - } - for (std::size_t i = 0; i < column_names_.size(); ++i) - { - if (i > 0) - { - sink_.push_back(options_.field_delimiter()); - } - auto it = cname_value_map_.find(column_names_[i]); - if (it != cname_value_map_.end()) - { - sink_.append(it->second.data(),it->second.length()); - it->second.clear(); - } - } - sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); - } -<<<<<<< HEAD -======= - for (std::size_t i = 0; i < strings_buffer_.size(); ++i) - { - if (i > 0) - { - sink_.push_back(options_.field_delimiter()); - } - auto it = buffered_line_.find(strings_buffer_[i]); - if (it != buffered_line_.end()) - { - sink_.append((*it).second.data(),(*it).second.length()); - (*it).second.clear(); - } - } - sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); ->>>>>>> eee098be8f774f64527281d5beb9e90aa9b73e06 - break; - case stack_item_kind::column_mapping: - { - for (const auto& item : column_names_) - { - sink_.append(item.data(), item.size()); - sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); - } - break; - } - default: - break; - } - stack_.pop_back(); - if (!stack_.empty()) - { - end_value(); - } - return true; - } - - bool visit_begin_array(semantic_tag, const ser_context&, std::error_code& ec) override - { - if (stack_.empty()) - { - stack_.emplace_back(stack_item_kind::row_mapping); - return true; - } - - switch (stack_.back().item_kind_) - { - case stack_item_kind::row_mapping: - stack_.emplace_back(stack_item_kind::row); - return true; - case stack_item_kind::object: - stack_.emplace_back(stack_item_kind::object_multi_valued_field); - return true; - case stack_item_kind::column_mapping: - stack_.emplace_back(stack_item_kind::column); - row_counts_.push_back(1); - if (column_names_.size() <= row_counts_.back()) - { - column_names_.emplace_back(); - } - return true; - case stack_item_kind::column: - { - if (column_names_.size() <= row_counts_.back()) - { - column_names_.emplace_back(); - } - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); - begin_value(bo); - stack_.emplace_back(stack_item_kind::column_multi_valued_field); - return true; - } - case stack_item_kind::row: - begin_value(sink_); - //stack_.emplace_back(stack_item_kind::row_multi_valued_field); - stack_.emplace_back(stack_item_kind::row); - return true; - default: // error - ec = csv_errc::source_error; - return false; - } - } - - bool visit_end_array(const ser_context&, std::error_code&) override - { - JSONCONS_ASSERT(!stack_.empty()); - switch (stack_.back().item_kind_) - { - case stack_item_kind::row: - if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::row_mapping) - { - if (stack_[0].count_ == 0) - { - for (std::size_t i = 0; i < column_names_.size(); ++i) - { - if (i > 0) - { - sink_.push_back(options_.field_delimiter()); - } - sink_.append(column_names_[i].data(), column_names_[i].length()); - } - sink_.append(options_.line_delimiter().data(), - options_.line_delimiter().length()); - } - - //std::cout << "visit_end_array: write row column_names: " << column_names_.size() << "\n"; - for (std::size_t i = 0; i < column_names_.size(); ++i) - { - if (i > 0) - { - sink_.push_back(options_.field_delimiter()); - } - auto it = cname_value_map_.find(column_names_[i]); - if (it != cname_value_map_.end()) - { - sink_.append(it->second.data(),it->second.length()); - it->second.clear(); - } - } - sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); - } - break; - case stack_item_kind::column: - ++column_index_; - break; - default: - break; - } - stack_.pop_back(); - - if (!stack_.empty()) - { - end_value(); - } - return true; - } - - bool visit_key(const string_view_type& name, const ser_context&, std::error_code&) override - { - JSONCONS_ASSERT(!stack_.empty()); - switch (stack_.back().item_kind_) - { - case stack_item_kind::object: - { - stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; - stack_.back().pathname_.push_back('/'); - stack_.back().pathname_.append(std::string(name)); - break; - } - case stack_item_kind::column_mapping: - { - if (column_names_.empty()) - { - column_names_.emplace_back(name); - } - else - { - column_names_[0].push_back(options_.field_delimiter()); - column_names_[0].append(string_type(name)); - } - break; - } - default: - break; - } - return true; - } - - void append_array_path_component() - { - stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; - stack_.back().pathname_.push_back('/'); - buffer_.clear(); - jsoncons::detail::from_integer(stack_.back().count_, buffer_); - stack_.back().pathname_.append(buffer_); - if (stack_[0].count_ == 0 && options_.column_names().empty()) - { - column_names_.emplace_back(stack_.back().pathname_); - } - } - - bool visit_null(semantic_tag, const ser_context&, std::error_code&) override - { - JSONCONS_ASSERT(!stack_.empty()); - switch (stack_.back().item_kind_) - { - case stack_item_kind::object: - case stack_item_kind::object_multi_valued_field: - { - if (stack_[0].count_ == 0) - { - if (options_.column_names().empty()) - { - column_names_.emplace_back(stack_.back().pathname_); - } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); - } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) - { - std::basic_string s; - jsoncons::string_sink> bo(s); - write_null_value(bo); - bo.flush(); - if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) - { - (*it).second.push_back(options_.subfield_delimiter()); - } - (*it).second.append(s); - } - break; - } - case stack_item_kind::row: - { - append_array_path_component(); - if (stack_[0].count_ == 0) - { - cname_value_map_[stack_.back().pathname_] = std::basic_string(); - } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) - { - std::basic_string s; - jsoncons::string_sink> bo(s); - write_null_value(bo); - bo.flush(); - it->second.append(s); - } - break; - } - case stack_item_kind::row_multi_valued_field: - write_null_value(sink_); - break; - case stack_item_kind::column: - { - if (column_names_.size() <= row_counts_.back()) - { - column_names_.emplace_back(); - } - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); - write_null_value(bo); - break; - } - case stack_item_kind::column_multi_valued_field: - { - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); - write_null_value(bo); - break; - } - default: - break; - } - return true; - } - - bool visit_string(const string_view_type& sv, semantic_tag, const ser_context&, std::error_code&) override - { - JSONCONS_ASSERT(!stack_.empty()); - switch (stack_.back().item_kind_) - { - case stack_item_kind::object: - case stack_item_kind::object_multi_valued_field: - { - if (stack_[0].count_ == 0) - { - if (options_.column_names().empty()) - { - column_names_.emplace_back(stack_.back().pathname_); - } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); - } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) - { - std::basic_string s; - jsoncons::string_sink> bo(s); - write_string_value(sv,bo); - bo.flush(); -<<<<<<< HEAD - cname_value_map_[stack_.back().pathname_] = s; -======= - if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) - { - (*it).second.push_back(options_.subfield_delimiter()); - } - (*it).second.append(s); ->>>>>>> eee098be8f774f64527281d5beb9e90aa9b73e06 - } - break; - } - case stack_item_kind::row: - { - append_array_path_component(); - if (stack_[0].count_ == 0) - { - cname_value_map_[stack_.back().pathname_] = std::basic_string(); - } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) - { - std::basic_string s; - jsoncons::string_sink> bo(s); - write_string_value(sv,bo); - bo.flush(); - it->second.append(s); - } - break; - } - case stack_item_kind::row_multi_valued_field: - write_string_value(sv,sink_); - break; - case stack_item_kind::column: - { - if (column_names_.size() <= row_counts_.back()) - { - column_names_.emplace_back(); - } - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); - write_string_value(sv,bo); - break; - } - case stack_item_kind::column_multi_valued_field: - { - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); - write_string_value(sv,bo); - break; - } - default: - break; - } - return true; - } - - bool visit_byte_string(const byte_string_view& b, - semantic_tag tag, - const ser_context& context, - std::error_code& ec) override - { - byte_string_chars_format encoding_hint; - switch (tag) - { - case semantic_tag::base16: - encoding_hint = byte_string_chars_format::base16; - break; - case semantic_tag::base64: - encoding_hint = byte_string_chars_format::base64; - break; - case semantic_tag::base64url: - encoding_hint = byte_string_chars_format::base64url; - break; - default: - encoding_hint = byte_string_chars_format::none; - break; - } - byte_string_chars_format format = jsoncons::detail::resolve_byte_string_chars_format(encoding_hint,byte_string_chars_format::none,byte_string_chars_format::base64url); - - std::basic_string s; - switch (format) - { - case byte_string_chars_format::base16: - { - encode_base16(b.begin(),b.end(),s); - visit_string(s, semantic_tag::none, context, ec); - break; - } - case byte_string_chars_format::base64: - { - encode_base64(b.begin(),b.end(),s); - visit_string(s, semantic_tag::none, context, ec); - break; - } - case byte_string_chars_format::base64url: - { - encode_base64url(b.begin(),b.end(),s); - visit_string(s, semantic_tag::none, context, ec); - break; - } - default: - { - JSONCONS_UNREACHABLE(); - } - } - - return true; - } - - bool visit_double(double val, - semantic_tag, - const ser_context& context, - std::error_code& ec) override - { - JSONCONS_ASSERT(!stack_.empty()); - switch (stack_.back().item_kind_) - { - case stack_item_kind::object: - case stack_item_kind::object_multi_valued_field: - { - if (stack_[0].count_ == 0) - { - if (options_.column_names().empty()) - { - column_names_.emplace_back(stack_.back().pathname_); - } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); - } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) - { - std::basic_string s; - jsoncons::string_sink> bo(s); - write_double_value(val, context, bo, ec); - bo.flush(); - if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) - { - (*it).second.push_back(options_.subfield_delimiter()); - } - (*it).second.append(s); - } - break; - } - case stack_item_kind::row: - { - append_array_path_component(); - if (stack_[0].count_ == 0) - { - cname_value_map_[stack_.back().pathname_] = std::basic_string(); - } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) - { - std::basic_string s; - jsoncons::string_sink> bo(s); - write_double_value(val, context, bo, ec); - bo.flush(); - it->second.append(s); - } - break; - } - case stack_item_kind::row_multi_valued_field: - write_double_value(val, context, sink_, ec); - break; - case stack_item_kind::column: - { - if (column_names_.size() <= row_counts_.back()) - { - column_names_.emplace_back(); - } - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); - write_double_value(val, context, bo, ec); - break; - } - case stack_item_kind::column_multi_valued_field: - { - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); - write_double_value(val, context, bo, ec); - break; - } - default: - break; - } - return true; - } - - bool visit_int64(int64_t val, - semantic_tag, - const ser_context&, - std::error_code&) override - { - JSONCONS_ASSERT(!stack_.empty()); - switch (stack_.back().item_kind_) - { - case stack_item_kind::object: - case stack_item_kind::object_multi_valued_field: - { - if (stack_[0].count_ == 0) - { - if (options_.column_names().empty()) - { - column_names_.emplace_back(stack_.back().pathname_); - } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); - } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) - { - std::basic_string s; - jsoncons::string_sink> bo(s); - write_int64_value(val,bo); - bo.flush(); - if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) - { - (*it).second.push_back(options_.subfield_delimiter()); - } - (*it).second.append(s); - } - break; - } - case stack_item_kind::row: - { - append_array_path_component(); - if (stack_[0].count_ == 0) - { - cname_value_map_[stack_.back().pathname_] = std::basic_string(); - } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) - { - std::basic_string s; - jsoncons::string_sink> bo(s); - write_int64_value(val,bo); - bo.flush(); - it->second.append(s); - } - break; - } - case stack_item_kind::row_multi_valued_field: - write_int64_value(val,sink_); - break; - case stack_item_kind::column: - { - if (column_names_.size() <= row_counts_.back()) - { - column_names_.emplace_back(); - } - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); - write_int64_value(val, bo); - break; - } - case stack_item_kind::column_multi_valued_field: - { - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); - write_int64_value(val, bo); - break; - } - default: - break; - } - return true; - } - - bool visit_uint64(uint64_t val, - semantic_tag, - const ser_context&, - std::error_code&) override - { - JSONCONS_ASSERT(!stack_.empty()); - switch (stack_.back().item_kind_) - { - case stack_item_kind::object: - case stack_item_kind::object_multi_valued_field: - { - if (stack_[0].count_ == 0) - { - if (options_.column_names().empty()) - { - column_names_.emplace_back(stack_.back().pathname_); - } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); - } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) - { - std::basic_string s; - jsoncons::string_sink> bo(s); - write_uint64_value(val, bo); - bo.flush(); - if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) - { - (*it).second.push_back(options_.subfield_delimiter()); - } - (*it).second.append(s); - } - break; - } - case stack_item_kind::row: - { - append_array_path_component(); - if (stack_[0].count_ == 0) - { - cname_value_map_[stack_.back().pathname_] = std::basic_string(); - } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) - { - std::basic_string s; - jsoncons::string_sink> bo(s); - write_uint64_value(val, bo); - bo.flush(); - it->second.append(s); - } - break; - } - case stack_item_kind::row_multi_valued_field: - write_uint64_value(val,sink_); - break; - case stack_item_kind::column: - { - if (column_names_.size() <= row_counts_.back()) - { - column_names_.emplace_back(); - } - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); - write_uint64_value(val, bo); - break; - } - case stack_item_kind::column_multi_valued_field: - { - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); - write_uint64_value(val, bo); - break; - } - default: - break; - } - return true; - } - - bool visit_bool(bool val, semantic_tag, const ser_context&, std::error_code&) override - { - JSONCONS_ASSERT(!stack_.empty()); - switch (stack_.back().item_kind_) - { - case stack_item_kind::object: - case stack_item_kind::object_multi_valued_field: - { - if (stack_[0].count_ == 0) - { - if (options_.column_names().empty()) - { - column_names_.emplace_back(stack_.back().pathname_); - } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); - } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) - { - std::basic_string s; - jsoncons::string_sink> bo(s); - write_bool_value(val,bo); - bo.flush(); - if (!(*it).second.empty() && options_.subfield_delimiter() != char_type()) - { - (*it).second.push_back(options_.subfield_delimiter()); - } - (*it).second.append(s); - } - break; - } - case stack_item_kind::row: - { - append_array_path_component(); - if (stack_[0].count_ == 0) - { - cname_value_map_[stack_.back().pathname_] = std::basic_string(); - } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) - { - std::basic_string s; - jsoncons::string_sink> bo(s); - write_bool_value(val,bo); - bo.flush(); - it->second.append(s); - } - break; - } - case stack_item_kind::row_multi_valued_field: - write_bool_value(val,sink_); - break; - case stack_item_kind::column: - { - if (column_names_.size() <= row_counts_.back()) - { - column_names_.emplace_back(); - } - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); - write_bool_value(val, bo); - break; - } - case stack_item_kind::column_multi_valued_field: - { - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); - write_bool_value(val, bo); - break; - } - default: - break; - } - return true; - } - - template - bool do_string_value(const CharT* s, std::size_t length, AnyWriter& sink) - { - bool quote = false; - if (options_.quote_style() == quote_style_kind::all || options_.quote_style() == quote_style_kind::nonnumeric || - (options_.quote_style() == quote_style_kind::minimal && - (std::char_traits::find(s, length, options_.field_delimiter()) != nullptr || std::char_traits::find(s, length, options_.quote_char()) != nullptr))) - { - quote = true; - sink.push_back(options_.quote_char()); - } - escape_string(s, length, options_.quote_char(), options_.quote_escape_char(), sink); - if (quote) - { - sink.push_back(options_.quote_char()); - } - - return true; - } - - template - void write_string_value(const string_view_type& value, AnyWriter& sink) - { - begin_value(sink); - do_string_value(value.data(),value.length(),sink); - end_value(); - } - - template - void write_double_value(double val, const ser_context& context, AnyWriter& sink, std::error_code& ec) - { - begin_value(sink); - - if (!std::isfinite(val)) - { - if ((std::isnan)(val)) - { - if (options_.enable_nan_to_num()) - { - sink.append(options_.nan_to_num().data(), options_.nan_to_num().length()); - } - else if (options_.enable_nan_to_str()) - { - visit_string(options_.nan_to_str(), semantic_tag::none, context, ec); - } - else - { - sink.append(null_constant().data(), null_constant().size()); - } - } - else if (val == std::numeric_limits::infinity()) - { - if (options_.enable_inf_to_num()) - { - sink.append(options_.inf_to_num().data(), options_.inf_to_num().length()); - } - else if (options_.enable_inf_to_str()) - { - visit_string(options_.inf_to_str(), semantic_tag::none, context, ec); - } - else - { - sink.append(null_constant().data(), null_constant().size()); - } - } - else - { - if (options_.enable_neginf_to_num()) - { - sink.append(options_.neginf_to_num().data(), options_.neginf_to_num().length()); - } - else if (options_.enable_neginf_to_str()) - { - visit_string(options_.neginf_to_str(), semantic_tag::none, context, ec); - } - else - { - sink.append(null_constant().data(), null_constant().size()); - } - } - } - else - { - fp_(val, sink); - } - - end_value(); - - } - - template - void write_int64_value(int64_t val, AnyWriter& sink) - { - begin_value(sink); - - jsoncons::detail::from_integer(val,sink); - - end_value(); - } - - template - void write_uint64_value(uint64_t val, AnyWriter& sink) - { - begin_value(sink); - - jsoncons::detail::from_integer(val,sink); - - end_value(); - } - - template - void write_bool_value(bool val, AnyWriter& sink) - { - begin_value(sink); - - if (val) - { - sink.append(true_constant().data(), true_constant().size()); - } - else - { - sink.append(false_constant().data(), false_constant().size()); - } - - end_value(); - } - - template - bool write_null_value(AnyWriter& sink) - { - begin_value(sink); - sink.append(null_constant().data(), null_constant().size()); - end_value(); - return true; - } - - template - void begin_value(AnyWriter& sink) - { - JSONCONS_ASSERT(!stack_.empty()); - switch (stack_.back().item_kind_) - { - case stack_item_kind::row: - break; - case stack_item_kind::column: - { - if (row_counts_.size() >= 3) - { - for (std::size_t i = row_counts_.size()-2; i-- > 0;) - { - if (row_counts_[i] <= row_counts_.back()) - { - sink.push_back(options_.field_delimiter()); - } - else - { - break; - } - } - } - if (column_index_ > 0) - { - sink.push_back(options_.field_delimiter()); - } - break; - } - case stack_item_kind::row_multi_valued_field: - case stack_item_kind::column_multi_valued_field: - if (stack_.back().count_ > 0 && options_.subfield_delimiter() != char_type()) - { - sink.push_back(options_.subfield_delimiter()); - } - break; - default: - break; - } - } - - void end_value() - { - JSONCONS_ASSERT(!stack_.empty()); - switch(stack_.back().item_kind_) - { - case stack_item_kind::row: - { - ++stack_.back().count_; - break; - } - case stack_item_kind::column: - { - ++row_counts_.back(); - break; - } - default: - ++stack_.back().count_; - break; - } - } -}; - -using csv_stream_encoder = basic_csv_encoder; -using csv_string_encoder = basic_csv_encoder>; -using csv_wstream_encoder = basic_csv_encoder; -using wcsv_string_encoder = basic_csv_encoder>; - -}} - -#endif From 7b2ca1ec6747043a57e56bc88078c342c2d49ddc Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Fri, 10 Jan 2025 11:45:48 -0500 Subject: [PATCH 11/79] csv_options is_flat --- include/jsoncons_ext/csv/csv_options.hpp | 54 ++++++++++++------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_options.hpp b/include/jsoncons_ext/csv/csv_options.hpp index 1a9c36d074..55c0e7e398 100644 --- a/include/jsoncons_ext/csv/csv_options.hpp +++ b/include/jsoncons_ext/csv/csv_options.hpp @@ -270,20 +270,21 @@ class basic_csv_options_common using char_type = CharT; using string_type = std::basic_string; private: - char_type field_delimiter_; - char_type quote_char_; - char_type quote_escape_char_; - char_type subfield_delimiter_; - - bool enable_nan_to_num_:1; - bool enable_inf_to_num_:1; - bool enable_neginf_to_num_:1; - bool enable_nan_to_str_:1; - bool enable_inf_to_str_:1; - bool enable_neginf_to_str_:1; - bool enable_str_to_nan_:1; - bool enable_str_to_inf_:1; - bool enable_str_to_neginf_:1; + bool is_flat_{true}; + char_type field_delimiter_{','}; + char_type quote_char_{'\"'}; + char_type quote_escape_char_{'\"'}; + char_type subfield_delimiter_{char_type{}}; + + bool enable_nan_to_num_:1{false}; + bool enable_inf_to_num_:1{false}; + bool enable_neginf_to_num_:1{false}; + bool enable_nan_to_str_:1{false}; + bool enable_inf_to_str_:1{false}; + bool enable_neginf_to_str_:1{false}; + bool enable_str_to_nan_:1{false}; + bool enable_str_to_inf_:1{false}; + bool enable_str_to_neginf_:1{false}; string_type nan_to_num_; string_type inf_to_num_; @@ -295,19 +296,6 @@ class basic_csv_options_common protected: basic_csv_options_common() - : field_delimiter_(','), - quote_char_('\"'), - quote_escape_char_('\"'), - subfield_delimiter_(char_type()), - enable_nan_to_num_(false), - enable_inf_to_num_(false), - enable_neginf_to_num_(false), - enable_nan_to_str_(false), - enable_inf_to_str_(false), - enable_neginf_to_str_(false), - enable_str_to_nan_(false), - enable_str_to_inf_(false), - enable_str_to_neginf_(false) { } @@ -318,6 +306,11 @@ class basic_csv_options_common virtual ~basic_csv_options_common() = default; public: + bool is_flat() const + { + return is_flat_; + } + char_type field_delimiter() const { return field_delimiter_; @@ -682,6 +675,7 @@ class basic_csv_options final : public basic_csv_decode_options, public b using basic_csv_decode_options::nan_to_num; using basic_csv_decode_options::inf_to_num; using basic_csv_decode_options::neginf_to_num; + using basic_csv_decode_options::is_flat; using basic_csv_decode_options::field_delimiter; using basic_csv_decode_options::subfield_delimiter; using basic_csv_decode_options::quote_char; @@ -818,6 +812,12 @@ class basic_csv_options final : public basic_csv_decode_options, public b return *this; } + basic_csv_options& is_flat(bool value) + { + this->is_flat_ = value; + return *this; + } + basic_csv_options& field_delimiter(char_type value) { this->field_delimiter_ = value; From caa9c5dd52d2535b263c1532f366da825f5b6ca2 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Fri, 10 Jan 2025 13:16:58 -0500 Subject: [PATCH 12/79] csv is_flat -> flat --- include/jsoncons_ext/csv/csv_options.hpp | 12 +-- test/csv/src/csv_encoder_tests.cpp | 125 +++++++++++++++++++++-- 2 files changed, 124 insertions(+), 13 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_options.hpp b/include/jsoncons_ext/csv/csv_options.hpp index 55c0e7e398..152567beea 100644 --- a/include/jsoncons_ext/csv/csv_options.hpp +++ b/include/jsoncons_ext/csv/csv_options.hpp @@ -270,12 +270,12 @@ class basic_csv_options_common using char_type = CharT; using string_type = std::basic_string; private: - bool is_flat_{true}; char_type field_delimiter_{','}; char_type quote_char_{'\"'}; char_type quote_escape_char_{'\"'}; char_type subfield_delimiter_{char_type{}}; + bool flat_:1{true}; bool enable_nan_to_num_:1{false}; bool enable_inf_to_num_:1{false}; bool enable_neginf_to_num_:1{false}; @@ -306,9 +306,9 @@ class basic_csv_options_common virtual ~basic_csv_options_common() = default; public: - bool is_flat() const + bool flat() const { - return is_flat_; + return flat_; } char_type field_delimiter() const @@ -675,7 +675,7 @@ class basic_csv_options final : public basic_csv_decode_options, public b using basic_csv_decode_options::nan_to_num; using basic_csv_decode_options::inf_to_num; using basic_csv_decode_options::neginf_to_num; - using basic_csv_decode_options::is_flat; + using basic_csv_decode_options::flat; using basic_csv_decode_options::field_delimiter; using basic_csv_decode_options::subfield_delimiter; using basic_csv_decode_options::quote_char; @@ -812,9 +812,9 @@ class basic_csv_options final : public basic_csv_decode_options, public b return *this; } - basic_csv_options& is_flat(bool value) + basic_csv_options& flat(bool value) { - this->is_flat_ = value; + this->flat_ = value; return *this; } diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 48652530a7..0a6c2f8d35 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -7,9 +7,112 @@ namespace csv = jsoncons::csv; -TEST_CASE("test csv to json") +TEST_CASE("test json to flat csv") { - /*SECTION("array of objects to csv") + SECTION("array of objects to csv") + { + std::string expected = R"(boolean,datetime,float,text +true,1971-01-01T04:14:00,1.0,Chicago Reader +true,1948-01-01T14:57:13,1.27,Chicago Sun-Times +)"; + + std::string jtext = R"( +[ + { + "text": "Chicago Reader", + "float": 1.0, + "datetime": "1971-01-01T04:14:00", + "boolean": true, + "nested": { + "time": "04:14:00", + "nested": { + "date": "1971-01-01", + "integer": 40 + } + } + }, + { + "text": "Chicago Sun-Times", + "float": 1.27, + "datetime": "1948-01-01T14:57:13", + "boolean": true, + "nested": { + "time": "14:57:13", + "nested": { + "date": "1948-01-01", + "integer": 63 + } + } + } +] + )"; + + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + std::string buf; + csv::csv_string_encoder encoder(buf); + j.dump(encoder); + + std::cout << buf << "\n"; + + //CHECK(expected == buf); + } +#if 0 + SECTION("array of arrays to csv") + { + std::string expected = R"(/0,/1,/2,/3,/3/0,/3/0/0,/3/0/1 +Chicago Reader,1.0,1971-01-01T04:14:00,true,04:14:00,1971-01-01,40 +Chicago Sun-Times,1.27,1948-01-01T14:57:13,true,14:57:13,1948-01-01,63 +)"; + + std::string jtext = R"( +[ + [ + "Chicago Reader", + 1.0, + "1971-01-01T04:14:00", + true, + [ + "04:14:00", + [ + "1971-01-01", + 40 + ] + ] + ], + [ + "Chicago Sun-Times", + 1.27, + "1948-01-01T14:57:13", + true, + [ + "14:57:13", + [ + "1948-01-01", + 63 + ] + ] + ] +] + )"; + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + std::string buf; + csv::csv_string_encoder encoder(buf); + j.dump(encoder); + + CHECK(expected == buf); + } +#endif +} + +TEST_CASE("test json to non-flat csv") +{ + SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text true,1971-01-01T04:14:00,1.0,1971-01-01,40,04:14:00,Chicago Reader @@ -48,15 +151,19 @@ true,1948-01-01T14:57:13,1.27,1948-01-01,63,14:57:13,Chicago Sun-Times )"; auto j = jsoncons::json::parse(jtext); - std::cout << pretty_print(j) << "\n"; + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .flat(false); std::string buf; - csv::csv_string_encoder encoder(buf); + csv::csv_string_encoder encoder(buf, options); j.dump(encoder); CHECK(expected == buf); - }*/ + } +#if 0 SECTION("array of arrays to csv") { std::string expected = R"(/0,/1,/2,/3,/3/0,/3/0/0,/3/0/1 @@ -96,13 +203,17 @@ Chicago Sun-Times,1.27,1948-01-01T14:57:13,true,14:57:13,1948-01-01,63 )"; auto j = jsoncons::json::parse(jtext); - std::cout << pretty_print(j) << "\n"; + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .flat(false); std::string buf; - csv::csv_string_encoder encoder(buf); + csv::csv_string_encoder encoder(buf, options); j.dump(encoder); CHECK(expected == buf); } +#endif } From 657c609e0c208b4a00d5eb238d1a6ee3428ff997 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Sun, 12 Jan 2025 10:39:51 -0500 Subject: [PATCH 13/79] csv flat mapping --- include/jsoncons_ext/csv/csv_encoder.hpp | 66 ++++++++++++++++++++---- test/csv/src/csv_encoder_tests.cpp | 2 +- 2 files changed, 57 insertions(+), 11 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 70e3e9c6ba..d622ef1d32 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -58,8 +58,11 @@ class basic_csv_encoder final : public basic_json_visitor enum class stack_item_kind { + flat_row_mapping, row_mapping, column_mapping, + flat_object, + unmapped, object, row, column, @@ -81,7 +84,7 @@ class basic_csv_encoder final : public basic_json_visitor bool is_object() const { - return item_kind_ == stack_item_kind::object; + return item_kind_ == stack_item_kind::object || stack_item_kind::flat_object; } stack_item_kind item_kind() const @@ -184,7 +187,7 @@ class basic_csv_encoder final : public basic_json_visitor sink_.flush(); } - bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override + bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& /*ec*/) override { if (stack_.empty()) { @@ -193,26 +196,38 @@ class basic_csv_encoder final : public basic_json_visitor } switch (stack_.back().item_kind_) { + case stack_item_kind::flat_row_mapping: + stack_.emplace_back(stack_item_kind::flat_object); + return true; case stack_item_kind::row_mapping: stack_.emplace_back(stack_item_kind::object); return true; case stack_item_kind::object: stack_.emplace_back(stack_item_kind::object); return true; + case stack_item_kind::unmapped: + case stack_item_kind::flat_object: + stack_.emplace_back(stack_item_kind::unmapped); + return true; default: // error - ec = csv_errc::source_error; - return false; + //ec = csv_errc::source_error; + //return false; + return true; } } bool visit_end_object(const ser_context&, std::error_code&) override { - JSONCONS_ASSERT(!stack_.empty()); + if (stack_.empty()) + { + return true; + } switch (stack_.back().item_kind_) { + case stack_item_kind::flat_object: case stack_item_kind::object: - if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::row_mapping) + if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::row_mapping || stack_[stack_.size()-2].item_kind_ == stack_item_kind::flat_row_mapping) { if (stack_[0].count_ == 0) { @@ -268,7 +283,14 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_.empty()) { - stack_.emplace_back(stack_item_kind::row_mapping); + if (options_.flat()) + { + stack_.emplace_back(stack_item_kind::flat_row_mapping); + } + else + { + stack_.emplace_back(stack_item_kind::row_mapping); + } return true; } @@ -312,7 +334,10 @@ class basic_csv_encoder final : public basic_json_visitor bool visit_end_array(const ser_context&, std::error_code&) override { - JSONCONS_ASSERT(!stack_.empty()); + if (stack_.empty()) + { + return true; + } switch (stack_.back().item_kind_) { case stack_item_kind::row: @@ -369,6 +394,13 @@ class basic_csv_encoder final : public basic_json_visitor JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { + case stack_item_kind::flat_object: + { + stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; + stack_.back().pathname_.push_back('/'); + stack_.back().pathname_.append(std::string(name)); + break; + } case stack_item_kind::object: { stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; @@ -413,6 +445,7 @@ class basic_csv_encoder final : public basic_json_visitor JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { + case stack_item_kind::flat_object: case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { @@ -487,6 +520,7 @@ class basic_csv_encoder final : public basic_json_visitor JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { + case stack_item_kind::flat_object: case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { @@ -557,6 +591,8 @@ class basic_csv_encoder final : public basic_json_visitor const ser_context& context, std::error_code& ec) override { + JSONCONS_ASSERT(!stack_.empty()); + byte_string_chars_format encoding_hint; switch (tag) { @@ -613,6 +649,7 @@ class basic_csv_encoder final : public basic_json_visitor JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { + case stack_item_kind::flat_object: case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { @@ -690,6 +727,7 @@ class basic_csv_encoder final : public basic_json_visitor JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { + case stack_item_kind::flat_object: case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { @@ -767,6 +805,7 @@ class basic_csv_encoder final : public basic_json_visitor JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { + case stack_item_kind::flat_object: case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { @@ -841,6 +880,7 @@ class basic_csv_encoder final : public basic_json_visitor JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { + case stack_item_kind::flat_object: case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { @@ -1049,7 +1089,10 @@ class basic_csv_encoder final : public basic_json_visitor template void begin_value(AnyWriter& sink) { - JSONCONS_ASSERT(!stack_.empty()); + if (stack_.empty()) + { + return; + } switch (stack_.back().item_kind_) { case stack_item_kind::row: @@ -1090,7 +1133,10 @@ class basic_csv_encoder final : public basic_json_visitor void end_value() { - JSONCONS_ASSERT(!stack_.empty()); + if (stack_.empty()) + { + return; + } switch(stack_.back().item_kind_) { case stack_item_kind::row: diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 0a6c2f8d35..e2b584064d 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -13,7 +13,7 @@ TEST_CASE("test json to flat csv") { std::string expected = R"(boolean,datetime,float,text true,1971-01-01T04:14:00,1.0,Chicago Reader -true,1948-01-01T14:57:13,1.27,Chicago Sun-Times +#true,1948-01-01T14:57:13,1.27,Chicago Sun-Times )"; std::string jtext = R"( From 4cda123cf86ac95f8e778dab8fcef81740009512 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Sun, 12 Jan 2025 10:49:20 -0500 Subject: [PATCH 14/79] csv flat mapping --- include/jsoncons_ext/csv/csv_encoder.hpp | 7 ++++--- test/csv/src/csv_encoder_tests.cpp | 10 +++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index d622ef1d32..24de1c6729 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -396,9 +396,10 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: { - stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; - stack_.back().pathname_.push_back('/'); - stack_.back().pathname_.append(std::string(name)); + //stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; + //stack_.back().pathname_.push_back('/'); + //stack_.back().pathname_.append(std::string(name)); + stack_.back().pathname_ = std::string(name); break; } case stack_item_kind::object: diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index e2b584064d..4ca4a9bb4f 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,11 +9,12 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv") { +#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text true,1971-01-01T04:14:00,1.0,Chicago Reader -#true,1948-01-01T14:57:13,1.27,Chicago Sun-Times +true,1948-01-01T14:57:13,1.27,Chicago Sun-Times )"; std::string jtext = R"( @@ -57,9 +58,9 @@ true,1971-01-01T04:14:00,1.0,Chicago Reader std::cout << buf << "\n"; - //CHECK(expected == buf); + CHECK(expected == buf); } -#if 0 +#endif SECTION("array of arrays to csv") { std::string expected = R"(/0,/1,/2,/3,/3/0,/3/0/0,/3/0/1 @@ -106,8 +107,7 @@ Chicago Sun-Times,1.27,1948-01-01T14:57:13,true,14:57:13,1948-01-01,63 j.dump(encoder); CHECK(expected == buf); - } -#endif + } } TEST_CASE("test json to non-flat csv") From 5853ffc5fee1b6651a4425ebef54c4db01b1f0a8 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Sun, 12 Jan 2025 17:07:36 -0500 Subject: [PATCH 15/79] csv json array flat mapping --- include/jsoncons_ext/csv/csv_encoder.hpp | 65 +++++++++++++++++++++--- test/csv/src/csv_encoder_tests.cpp | 12 ++--- 2 files changed, 63 insertions(+), 14 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 24de1c6729..bab4ea0062 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -62,6 +62,7 @@ class basic_csv_encoder final : public basic_json_visitor row_mapping, column_mapping, flat_object, + flat_row, unmapped, object, row, @@ -296,12 +297,15 @@ class basic_csv_encoder final : public basic_json_visitor switch (stack_.back().item_kind_) { + case stack_item_kind::flat_row_mapping: + stack_.emplace_back(stack_item_kind::flat_row); + break; case stack_item_kind::row_mapping: stack_.emplace_back(stack_item_kind::row); - return true; + break; case stack_item_kind::object: stack_.emplace_back(stack_item_kind::object_multi_valued_field); - return true; + break; case stack_item_kind::column_mapping: stack_.emplace_back(stack_item_kind::column); row_counts_.push_back(1); @@ -309,7 +313,7 @@ class basic_csv_encoder final : public basic_json_visitor { column_names_.emplace_back(); } - return true; + break; case stack_item_kind::column: { if (column_names_.size() <= row_counts_.back()) @@ -319,17 +323,25 @@ class basic_csv_encoder final : public basic_json_visitor jsoncons::string_sink> bo(column_names_[row_counts_.back()]); begin_value(bo); stack_.emplace_back(stack_item_kind::column_multi_valued_field); - return true; + break; } case stack_item_kind::row: begin_value(sink_); //stack_.emplace_back(stack_item_kind::row_multi_valued_field); stack_.emplace_back(stack_item_kind::row); - return true; + break; + case stack_item_kind::flat_row: + begin_value(sink_); + stack_.emplace_back(stack_item_kind::unmapped); + break; + case stack_item_kind::unmapped: + stack_.emplace_back(stack_item_kind::unmapped); + break; default: // error ec = csv_errc::source_error; return false; } + return true; } bool visit_end_array(const ser_context&, std::error_code&) override @@ -338,8 +350,42 @@ class basic_csv_encoder final : public basic_json_visitor { return true; } + switch (stack_.back().item_kind_) { + case stack_item_kind::flat_row: + if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::flat_row_mapping) + { + if (stack_[0].count_ == 0 && !options_.column_names().empty()) + { + for (std::size_t i = 0; i < column_names_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + sink_.append(column_names_[i].data(), column_names_[i].length()); + } + sink_.append(options_.line_delimiter().data(), + options_.line_delimiter().length()); + } + + for (std::size_t i = 0; i < column_names_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + auto it = cname_value_map_.find(column_names_[i]); + if (it != cname_value_map_.end()) + { + sink_.append(it->second.data(),it->second.length()); + it->second.clear(); + } + } + sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + } + break; case stack_item_kind::row: if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::row_mapping) { @@ -357,7 +403,6 @@ class basic_csv_encoder final : public basic_json_visitor options_.line_delimiter().length()); } - //std::cout << "visit_end_array: write row column_names: " << column_names_.size() << "\n"; for (std::size_t i = 0; i < column_names_.size(); ++i) { if (i > 0) @@ -473,6 +518,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } + case stack_item_kind::flat_row: case stack_item_kind::row: { append_array_path_component(); @@ -544,6 +590,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } + case stack_item_kind::flat_row: case stack_item_kind::row: { append_array_path_component(); @@ -677,6 +724,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } + case stack_item_kind::flat_row: case stack_item_kind::row: { append_array_path_component(); @@ -755,6 +803,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } + case stack_item_kind::flat_row: case stack_item_kind::row: { append_array_path_component(); @@ -833,6 +882,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } + case stack_item_kind::flat_row: case stack_item_kind::row: { append_array_path_component(); @@ -908,6 +958,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } + case stack_item_kind::flat_row: case stack_item_kind::row: { append_array_path_component(); @@ -1096,6 +1147,7 @@ class basic_csv_encoder final : public basic_json_visitor } switch (stack_.back().item_kind_) { + case stack_item_kind::flat_row: case stack_item_kind::row: break; case stack_item_kind::column: @@ -1140,6 +1192,7 @@ class basic_csv_encoder final : public basic_json_visitor } switch(stack_.back().item_kind_) { + case stack_item_kind::flat_row: case stack_item_kind::row: { ++stack_.back().count_; diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 4ca4a9bb4f..6e89cb0420 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,7 +9,6 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv") { -#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -56,16 +55,15 @@ true,1948-01-01T14:57:13,1.27,Chicago Sun-Times csv::csv_string_encoder encoder(buf); j.dump(encoder); - std::cout << buf << "\n"; + //std::cout << buf << "\n"; CHECK(expected == buf); } -#endif + SECTION("array of arrays to csv") { - std::string expected = R"(/0,/1,/2,/3,/3/0,/3/0/0,/3/0/1 -Chicago Reader,1.0,1971-01-01T04:14:00,true,04:14:00,1971-01-01,40 -Chicago Sun-Times,1.27,1948-01-01T14:57:13,true,14:57:13,1948-01-01,63 + std::string expected = R"(Chicago Reader,1.0,1971-01-01T04:14:00,true +Chicago Sun-Times,1.27,1948-01-01T14:57:13,true )"; std::string jtext = R"( @@ -163,7 +161,6 @@ true,1948-01-01T14:57:13,1.27,1948-01-01,63,14:57:13,Chicago Sun-Times CHECK(expected == buf); } -#if 0 SECTION("array of arrays to csv") { std::string expected = R"(/0,/1,/2,/3,/3/0,/3/0/0,/3/0/1 @@ -214,6 +211,5 @@ Chicago Sun-Times,1.27,1948-01-01T14:57:13,true,14:57:13,1948-01-01,63 CHECK(expected == buf); } -#endif } From d3a0ab7f8c0a2550252b805b05f12e4ea088df1a Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Sun, 12 Jan 2025 18:13:31 -0500 Subject: [PATCH 16/79] csv_options init --- include/jsoncons_ext/csv/csv_options.hpp | 30 ++++++++++++++++-------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_options.hpp b/include/jsoncons_ext/csv/csv_options.hpp index 152567beea..65a917c4d7 100644 --- a/include/jsoncons_ext/csv/csv_options.hpp +++ b/include/jsoncons_ext/csv/csv_options.hpp @@ -275,16 +275,16 @@ class basic_csv_options_common char_type quote_escape_char_{'\"'}; char_type subfield_delimiter_{char_type{}}; - bool flat_:1{true}; - bool enable_nan_to_num_:1{false}; - bool enable_inf_to_num_:1{false}; - bool enable_neginf_to_num_:1{false}; - bool enable_nan_to_str_:1{false}; - bool enable_inf_to_str_:1{false}; - bool enable_neginf_to_str_:1{false}; - bool enable_str_to_nan_:1{false}; - bool enable_str_to_inf_:1{false}; - bool enable_str_to_neginf_:1{false}; + bool flat_:1; + bool enable_nan_to_num_:1; + bool enable_inf_to_num_:1; + bool enable_neginf_to_num_:1; + bool enable_nan_to_str_:1; + bool enable_inf_to_str_:1; + bool enable_neginf_to_str_:1; + bool enable_str_to_nan_:1; + bool enable_str_to_inf_:1; + bool enable_str_to_neginf_:1; string_type nan_to_num_; string_type inf_to_num_; @@ -296,6 +296,16 @@ class basic_csv_options_common protected: basic_csv_options_common() + : flat_{true}, + enable_nan_to_num_{false}, + enable_inf_to_num_{false}, + enable_neginf_to_num_{false}, + enable_nan_to_str_{false}, + enable_inf_to_str_{false}, + enable_neginf_to_str_{false}, + enable_str_to_nan_{false}, + enable_str_to_inf_{false}, + enable_str_to_neginf_{false} { } From de73521ae0741444f9b4778e020f8abff48d57e2 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Sun, 12 Jan 2025 21:21:30 -0500 Subject: [PATCH 17/79] csv flat mapping multivalued --- include/jsoncons_ext/csv/csv_encoder.hpp | 13 ++++++-- test/csv/src/csv_encoder_tests.cpp | 42 ++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index bab4ea0062..f430a3c348 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -327,11 +327,20 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::row: begin_value(sink_); - //stack_.emplace_back(stack_item_kind::row_multi_valued_field); stack_.emplace_back(stack_item_kind::row); break; case stack_item_kind::flat_row: begin_value(sink_); + if (options_.subfield_delimiter() == char_type()) + { + stack_.emplace_back(stack_item_kind::unmapped); + } + else + { + stack_.emplace_back(stack_item_kind::row_multi_valued_field); + } + break; + case stack_item_kind::row_multi_valued_field: stack_.emplace_back(stack_item_kind::unmapped); break; case stack_item_kind::unmapped: @@ -1174,7 +1183,7 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::row_multi_valued_field: case stack_item_kind::column_multi_valued_field: - if (stack_.back().count_ > 0 && options_.subfield_delimiter() != char_type()) + if (stack_.back().count_ > 0) { sink.push_back(options_.subfield_delimiter()); } diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 6e89cb0420..870728caf2 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,6 +9,7 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv") { +#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -106,8 +107,45 @@ Chicago Sun-Times,1.27,1948-01-01T14:57:13,true CHECK(expected == buf); } -} +#endif + SECTION("array of arrays and subarrays to csv") + { + std::string expected = R"(Chicago Reader,1.0,1971-01-01T04:14:00,true +Chicago Sun-Times,1.27,1948-01-01T14:57:13,true +)"; + + std::string jtext = R"( +[ + ["calculationPeriodCenters","paymentCenters","resetCenters"], + [ + ["NY","LON"],"TOR","LON" + ], + ["NY","LON", + ["TOR","LON"] + ], + [ + ["NY","LON"],"TOR","LON" + ], + ["NY","LON", + ["TOR","LON"] + ] +] + )"; + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .subfield_delimiter(';'); + + std::string buf; + csv::csv_string_encoder encoder(buf, options); + j.dump(encoder); + + CHECK(expected == buf); + } +} +#if 0 TEST_CASE("test json to non-flat csv") { SECTION("array of objects to csv") @@ -212,4 +250,4 @@ Chicago Sun-Times,1.27,1948-01-01T14:57:13,true,14:57:13,1948-01-01,63 CHECK(expected == buf); } } - +#endif From f61f0c7507701e863e9b1a080b02993e04c73bdf Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Sun, 12 Jan 2025 22:41:01 -0500 Subject: [PATCH 18/79] csv flat mapping multivalued --- include/jsoncons_ext/csv/csv_encoder.hpp | 74 ++++++++++++++++++++---- test/csv/src/csv_encoder_tests.cpp | 14 +++-- 2 files changed, 71 insertions(+), 17 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index f430a3c348..c31121cc27 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -107,6 +107,7 @@ class basic_csv_encoder final : public basic_json_visitor std::size_t column_index_; std::vector row_counts_; string_type buffer_; + string_type value_buffer_; // Noncopyable and nonmoveable basic_csv_encoder(const basic_csv_encoder&) = delete; @@ -127,7 +128,8 @@ class basic_csv_encoder final : public basic_json_visitor stack_(), fp_(options.float_format(), options.precision()), column_index_(0), - buffer_(alloc) + buffer_(alloc), + value_buffer_(alloc) { jsoncons::csv::detail::parse_column_names(options.column_names(), column_names_); } @@ -326,6 +328,7 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::row: + //append_array_path_component(); begin_value(sink_); stack_.emplace_back(stack_item_kind::row); break; @@ -337,6 +340,8 @@ class basic_csv_encoder final : public basic_json_visitor } else { + append_array_path_component(); + value_buffer_.clear(); stack_.emplace_back(stack_item_kind::row_multi_valued_field); } break; @@ -395,6 +400,15 @@ class basic_csv_encoder final : public basic_json_visitor sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); } break; + case stack_item_kind::row_multi_valued_field: + { + auto it = cname_value_map_.find(stack_[stack_.size()-2].pathname_); + if (it != cname_value_map_.end()) + { + it->second.append(value_buffer_.data(),value_buffer_.length()); + } + break; + } case stack_item_kind::row: if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::row_mapping) { @@ -547,8 +561,15 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::row_multi_valued_field: - write_null_value(sink_); + { + if (!value_buffer_.empty()) + { + value_buffer_.push_back(options_.subfield_delimiter()); + } + jsoncons::string_sink> bo(value_buffer_); + write_null_value(bo); break; + } case stack_item_kind::column: { if (column_names_.size() <= row_counts_.back()) @@ -619,8 +640,15 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::row_multi_valued_field: - write_string_value(sv,sink_); + { + if (!value_buffer_.empty()) + { + value_buffer_.push_back(options_.subfield_delimiter()); + } + jsoncons::string_sink> bo(value_buffer_); + write_string_value(sv, bo); break; + } case stack_item_kind::column: { if (column_names_.size() <= row_counts_.back()) @@ -753,8 +781,15 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::row_multi_valued_field: - write_double_value(val, context, sink_, ec); + { + if (!value_buffer_.empty()) + { + value_buffer_.push_back(options_.subfield_delimiter()); + } + jsoncons::string_sink> bo(value_buffer_); + write_double_value(val, context, bo, ec); break; + } case stack_item_kind::column: { if (column_names_.size() <= row_counts_.back()) @@ -832,8 +867,15 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::row_multi_valued_field: - write_int64_value(val,sink_); + { + if (!value_buffer_.empty()) + { + value_buffer_.push_back(options_.subfield_delimiter()); + } + jsoncons::string_sink> bo(value_buffer_); + write_int64_value(val, bo); break; + } case stack_item_kind::column: { if (column_names_.size() <= row_counts_.back()) @@ -911,8 +953,15 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::row_multi_valued_field: - write_uint64_value(val,sink_); + { + if (!value_buffer_.empty()) + { + value_buffer_.push_back(options_.subfield_delimiter()); + } + jsoncons::string_sink> bo(value_buffer_); + write_uint64_value(val, bo); break; + } case stack_item_kind::column: { if (column_names_.size() <= row_counts_.back()) @@ -987,8 +1036,15 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::row_multi_valued_field: - write_bool_value(val,sink_); + { + if (!value_buffer_.empty()) + { + value_buffer_.push_back(options_.subfield_delimiter()); + } + jsoncons::string_sink> bo(value_buffer_); + write_bool_value(val, bo); break; + } case stack_item_kind::column: { if (column_names_.size() <= row_counts_.back()) @@ -1183,10 +1239,6 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::row_multi_valued_field: case stack_item_kind::column_multi_valued_field: - if (stack_.back().count_ > 0) - { - sink.push_back(options_.subfield_delimiter()); - } break; default: break; diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 870728caf2..92206d5005 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,7 +9,6 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv") { -#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -107,11 +106,14 @@ Chicago Sun-Times,1.27,1948-01-01T14:57:13,true CHECK(expected == buf); } -#endif + SECTION("array of arrays and subarrays to csv") { - std::string expected = R"(Chicago Reader,1.0,1971-01-01T04:14:00,true -Chicago Sun-Times,1.27,1948-01-01T14:57:13,true + std::string expected = R"(calculationPeriodCenters,paymentCenters,resetCenters +NY;LON,TOR,LON +NY,LON,TOR;LON +NY;LON,TOR,LON +NY,LON,TOR;LON )"; std::string jtext = R"( @@ -145,7 +147,7 @@ Chicago Sun-Times,1.27,1948-01-01T14:57:13,true CHECK(expected == buf); } } -#if 0 + TEST_CASE("test json to non-flat csv") { SECTION("array of objects to csv") @@ -250,4 +252,4 @@ Chicago Sun-Times,1.27,1948-01-01T14:57:13,true,14:57:13,1948-01-01,63 CHECK(expected == buf); } } -#endif + From 550de3178a5deb19734677bedb31f9b93a579976 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 Jan 2025 09:54:35 +0000 Subject: [PATCH 19/79] Bump lukka/get-cmake from 3.31.3 to 3.31.4 Bumps [lukka/get-cmake](https://github.com/lukka/get-cmake) from 3.31.3 to 3.31.4. - [Release notes](https://github.com/lukka/get-cmake/releases) - [Commits](https://github.com/lukka/get-cmake/compare/v3.31.3...v3.31.4) --- updated-dependencies: - dependency-name: lukka/get-cmake dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/ubuntu.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 2288cdfd24..abe283dccc 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -170,7 +170,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Get latest CMake and ninja - uses: lukka/get-cmake@v3.31.3 + uses: lukka/get-cmake@v3.31.4 - name: cmake run: cmake -S . -B build -DCMAKE_BUILD_TYPE=Debug -DJSONCONS_BUILD_TESTS=On - name: build @@ -189,7 +189,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Get latest CMake and ninja - uses: lukka/get-cmake@v3.31.3 + uses: lukka/get-cmake@v3.31.4 - name: cmake run: cmake -S . -B build -DJSONCONS_SANITIZE=ON -DCMAKE_BUILD_TYPE=Debug -DJSONCONS_BUILD_TESTS=On - name: build @@ -210,7 +210,7 @@ jobs: run: apt-get update ; apt-get install -y unzip git - uses: actions/checkout@v4 - name: Get latest CMake and ninja - uses: lukka/get-cmake@v3.31.3 + uses: lukka/get-cmake@v3.31.4 - name: cmake run: cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DJSONCONS_BUILD_TESTS=On - name: build @@ -231,7 +231,7 @@ jobs: run: apt-get update ; apt-get install -y unzip git - uses: actions/checkout@v4 - name: Get latest CMake and ninja - uses: lukka/get-cmake@v3.31.3 + uses: lukka/get-cmake@v3.31.4 - name: cmake run: cmake -S . -B build -DJSONCONS_SANITIZE=ON -DCMAKE_BUILD_TYPE=Debug -DJSONCONS_BUILD_TESTS=On - name: build From 279f9c297da38e3bab9a4f6c5621aec61ce82043 Mon Sep 17 00:00:00 2001 From: c8ef Date: Mon, 13 Jan 2025 20:59:25 +0800 Subject: [PATCH 20/79] remove duplicate header guard(NFC) --- include/jsoncons/detail/grisu3.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/jsoncons/detail/grisu3.hpp b/include/jsoncons/detail/grisu3.hpp index 99ff0b3392..873d2cbc29 100644 --- a/include/jsoncons/detail/grisu3.hpp +++ b/include/jsoncons/detail/grisu3.hpp @@ -34,7 +34,6 @@ minor modifications. #ifndef JSONCONS_DETAIL_GRISU3_HPP #define JSONCONS_DETAIL_GRISU3_HPP -#pragma once #include #include #include From 8181d92156e0a2d3b0d6e5f00fe523ccce83d3f6 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Mon, 13 Jan 2025 11:06:48 -0500 Subject: [PATCH 21/79] csv flat mapping multivalued --- include/jsoncons_ext/csv/csv_encoder.hpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index c31121cc27..ceaf2f3137 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -208,8 +208,18 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: stack_.emplace_back(stack_item_kind::object); return true; - case stack_item_kind::unmapped: case stack_item_kind::flat_object: + if (options_.subfield_delimiter() == char_type()) + { + stack_.emplace_back(stack_item_kind::unmapped); + } + else + { + value_buffer_.clear(); + stack_.emplace_back(stack_item_kind::row_multi_valued_field); + } + return true; + case stack_item_kind::unmapped: stack_.emplace_back(stack_item_kind::unmapped); return true; default: // error @@ -328,12 +338,9 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::row: - //append_array_path_component(); - begin_value(sink_); stack_.emplace_back(stack_item_kind::row); break; case stack_item_kind::flat_row: - begin_value(sink_); if (options_.subfield_delimiter() == char_type()) { stack_.emplace_back(stack_item_kind::unmapped); From 47028315dd2fe36ec34894b15ed1cd4d663236ad Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Mon, 13 Jan 2025 11:08:53 -0500 Subject: [PATCH 22/79] csv flat mapping multivalued --- include/jsoncons_ext/csv/csv_encoder.hpp | 58 ++++++++++++------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index ceaf2f3137..2a21d98689 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -67,9 +67,9 @@ class basic_csv_encoder final : public basic_json_visitor object, row, column, - object_multi_valued_field, - row_multi_valued_field, - column_multi_valued_field + object_multivalued_field, + row_multivalued_field, + column_multivalued_field }; struct stack_item @@ -216,7 +216,7 @@ class basic_csv_encoder final : public basic_json_visitor else { value_buffer_.clear(); - stack_.emplace_back(stack_item_kind::row_multi_valued_field); + stack_.emplace_back(stack_item_kind::row_multivalued_field); } return true; case stack_item_kind::unmapped: @@ -316,7 +316,7 @@ class basic_csv_encoder final : public basic_json_visitor stack_.emplace_back(stack_item_kind::row); break; case stack_item_kind::object: - stack_.emplace_back(stack_item_kind::object_multi_valued_field); + stack_.emplace_back(stack_item_kind::object_multivalued_field); break; case stack_item_kind::column_mapping: stack_.emplace_back(stack_item_kind::column); @@ -334,7 +334,7 @@ class basic_csv_encoder final : public basic_json_visitor } jsoncons::string_sink> bo(column_names_[row_counts_.back()]); begin_value(bo); - stack_.emplace_back(stack_item_kind::column_multi_valued_field); + stack_.emplace_back(stack_item_kind::column_multivalued_field); break; } case stack_item_kind::row: @@ -349,10 +349,10 @@ class basic_csv_encoder final : public basic_json_visitor { append_array_path_component(); value_buffer_.clear(); - stack_.emplace_back(stack_item_kind::row_multi_valued_field); + stack_.emplace_back(stack_item_kind::row_multivalued_field); } break; - case stack_item_kind::row_multi_valued_field: + case stack_item_kind::row_multivalued_field: stack_.emplace_back(stack_item_kind::unmapped); break; case stack_item_kind::unmapped: @@ -407,7 +407,7 @@ class basic_csv_encoder final : public basic_json_visitor sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); } break; - case stack_item_kind::row_multi_valued_field: + case stack_item_kind::row_multivalued_field: { auto it = cname_value_map_.find(stack_[stack_.size()-2].pathname_); if (it != cname_value_map_.end()) @@ -523,7 +523,7 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: case stack_item_kind::object: - case stack_item_kind::object_multi_valued_field: + case stack_item_kind::object_multivalued_field: { if (stack_[0].count_ == 0) { @@ -567,7 +567,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } - case stack_item_kind::row_multi_valued_field: + case stack_item_kind::row_multivalued_field: { if (!value_buffer_.empty()) { @@ -587,7 +587,7 @@ class basic_csv_encoder final : public basic_json_visitor write_null_value(bo); break; } - case stack_item_kind::column_multi_valued_field: + case stack_item_kind::column_multivalued_field: { jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_null_value(bo); @@ -606,7 +606,7 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: case stack_item_kind::object: - case stack_item_kind::object_multi_valued_field: + case stack_item_kind::object_multivalued_field: { if (stack_[0].count_ == 0) { @@ -646,7 +646,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } - case stack_item_kind::row_multi_valued_field: + case stack_item_kind::row_multivalued_field: { if (!value_buffer_.empty()) { @@ -666,7 +666,7 @@ class basic_csv_encoder final : public basic_json_visitor write_string_value(sv,bo); break; } - case stack_item_kind::column_multi_valued_field: + case stack_item_kind::column_multivalued_field: { jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_string_value(sv,bo); @@ -743,7 +743,7 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: case stack_item_kind::object: - case stack_item_kind::object_multi_valued_field: + case stack_item_kind::object_multivalued_field: { if (stack_[0].count_ == 0) { @@ -787,7 +787,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } - case stack_item_kind::row_multi_valued_field: + case stack_item_kind::row_multivalued_field: { if (!value_buffer_.empty()) { @@ -807,7 +807,7 @@ class basic_csv_encoder final : public basic_json_visitor write_double_value(val, context, bo, ec); break; } - case stack_item_kind::column_multi_valued_field: + case stack_item_kind::column_multivalued_field: { jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_double_value(val, context, bo, ec); @@ -829,7 +829,7 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: case stack_item_kind::object: - case stack_item_kind::object_multi_valued_field: + case stack_item_kind::object_multivalued_field: { if (stack_[0].count_ == 0) { @@ -873,7 +873,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } - case stack_item_kind::row_multi_valued_field: + case stack_item_kind::row_multivalued_field: { if (!value_buffer_.empty()) { @@ -893,7 +893,7 @@ class basic_csv_encoder final : public basic_json_visitor write_int64_value(val, bo); break; } - case stack_item_kind::column_multi_valued_field: + case stack_item_kind::column_multivalued_field: { jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_int64_value(val, bo); @@ -915,7 +915,7 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: case stack_item_kind::object: - case stack_item_kind::object_multi_valued_field: + case stack_item_kind::object_multivalued_field: { if (stack_[0].count_ == 0) { @@ -959,7 +959,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } - case stack_item_kind::row_multi_valued_field: + case stack_item_kind::row_multivalued_field: { if (!value_buffer_.empty()) { @@ -979,7 +979,7 @@ class basic_csv_encoder final : public basic_json_visitor write_uint64_value(val, bo); break; } - case stack_item_kind::column_multi_valued_field: + case stack_item_kind::column_multivalued_field: { jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_uint64_value(val, bo); @@ -998,7 +998,7 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: case stack_item_kind::object: - case stack_item_kind::object_multi_valued_field: + case stack_item_kind::object_multivalued_field: { if (stack_[0].count_ == 0) { @@ -1042,7 +1042,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } - case stack_item_kind::row_multi_valued_field: + case stack_item_kind::row_multivalued_field: { if (!value_buffer_.empty()) { @@ -1062,7 +1062,7 @@ class basic_csv_encoder final : public basic_json_visitor write_bool_value(val, bo); break; } - case stack_item_kind::column_multi_valued_field: + case stack_item_kind::column_multivalued_field: { jsoncons::string_sink> bo(column_names_[row_counts_.back()]); write_bool_value(val, bo); @@ -1244,8 +1244,8 @@ class basic_csv_encoder final : public basic_json_visitor } break; } - case stack_item_kind::row_multi_valued_field: - case stack_item_kind::column_multi_valued_field: + case stack_item_kind::row_multivalued_field: + case stack_item_kind::column_multivalued_field: break; default: break; From 71388b57b3bc9e343692ee7b4a02024b255ec401 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Mon, 13 Jan 2025 18:00:11 -0500 Subject: [PATCH 23/79] csv flat mapping multivalued --- include/jsoncons_ext/csv/csv_encoder.hpp | 76 ++++++++++++++++-------- test/csv/src/csv_encoder_tests.cpp | 43 ++++++++++++++ 2 files changed, 95 insertions(+), 24 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 2a21d98689..fb8a718da4 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -68,7 +68,7 @@ class basic_csv_encoder final : public basic_json_visitor row, column, object_multivalued_field, - row_multivalued_field, + multivalued_field, column_multivalued_field }; @@ -190,7 +190,7 @@ class basic_csv_encoder final : public basic_json_visitor sink_.flush(); } - bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& /*ec*/) override + bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override { if (stack_.empty()) { @@ -201,13 +201,13 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_row_mapping: stack_.emplace_back(stack_item_kind::flat_object); - return true; + break; case stack_item_kind::row_mapping: stack_.emplace_back(stack_item_kind::object); return true; case stack_item_kind::object: stack_.emplace_back(stack_item_kind::object); - return true; + break; case stack_item_kind::flat_object: if (options_.subfield_delimiter() == char_type()) { @@ -215,21 +215,23 @@ class basic_csv_encoder final : public basic_json_visitor } else { + stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; + std::cout << "begin_object pathname: " << stack_.back().pathname_ << "\n"; value_buffer_.clear(); - stack_.emplace_back(stack_item_kind::row_multivalued_field); + stack_.emplace_back(stack_item_kind::multivalued_field); } - return true; + break; case stack_item_kind::unmapped: stack_.emplace_back(stack_item_kind::unmapped); - return true; + break; default: // error - //ec = csv_errc::source_error; - //return false; - return true; + ec = csv_errc::source_error; + return false; } + return true; } - bool visit_end_object(const ser_context&, std::error_code&) override + bool visit_end_object(const ser_context&, std::error_code& ec) override { if (stack_.empty()) { @@ -246,6 +248,7 @@ class basic_csv_encoder final : public basic_json_visitor { for (std::size_t i = 0; i < column_names_.size(); ++i) { + std::cout << "column-" << i << " name: " << column_names_[i] << "\n"; if (i > 0) { sink_.push_back(options_.field_delimiter()); @@ -281,8 +284,11 @@ class basic_csv_encoder final : public basic_json_visitor } break; } - default: + case stack_item_kind::unmapped: break; + default: + ec = csv_errc::source_error; + return false; } stack_.pop_back(); if (!stack_.empty()) @@ -349,10 +355,26 @@ class basic_csv_encoder final : public basic_json_visitor { append_array_path_component(); value_buffer_.clear(); - stack_.emplace_back(stack_item_kind::row_multivalued_field); + stack_.emplace_back(stack_item_kind::multivalued_field); } break; - case stack_item_kind::row_multivalued_field: + case stack_item_kind::flat_object: + if (options_.subfield_delimiter() == char_type()) + { + stack_.emplace_back(stack_item_kind::unmapped); + } + else + { + std::cout << "flat_object->row " << stack_.back().pathname_ << "\n"; + if (options_.column_names().empty()) + { + column_names_.emplace_back(stack_.back().pathname_); + } + value_buffer_.clear(); + stack_.emplace_back(stack_item_kind::multivalued_field); + } + break; + case stack_item_kind::multivalued_field: stack_.emplace_back(stack_item_kind::unmapped); break; case stack_item_kind::unmapped: @@ -365,7 +387,7 @@ class basic_csv_encoder final : public basic_json_visitor return true; } - bool visit_end_array(const ser_context&, std::error_code&) override + bool visit_end_array(const ser_context&, std::error_code& ec) override { if (stack_.empty()) { @@ -374,6 +396,9 @@ class basic_csv_encoder final : public basic_json_visitor switch (stack_.back().item_kind_) { + case stack_item_kind::row_mapping: + case stack_item_kind::flat_row_mapping: + break; case stack_item_kind::flat_row: if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::flat_row_mapping) { @@ -407,7 +432,7 @@ class basic_csv_encoder final : public basic_json_visitor sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); } break; - case stack_item_kind::row_multivalued_field: + case stack_item_kind::multivalued_field: { auto it = cname_value_map_.find(stack_[stack_.size()-2].pathname_); if (it != cname_value_map_.end()) @@ -452,8 +477,11 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::column: ++column_index_; break; - default: + case stack_item_kind::unmapped: break; + default: + ec = csv_errc::source_error; + return false; } stack_.pop_back(); @@ -567,7 +595,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } - case stack_item_kind::row_multivalued_field: + case stack_item_kind::multivalued_field: { if (!value_buffer_.empty()) { @@ -646,7 +674,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } - case stack_item_kind::row_multivalued_field: + case stack_item_kind::multivalued_field: { if (!value_buffer_.empty()) { @@ -787,7 +815,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } - case stack_item_kind::row_multivalued_field: + case stack_item_kind::multivalued_field: { if (!value_buffer_.empty()) { @@ -873,7 +901,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } - case stack_item_kind::row_multivalued_field: + case stack_item_kind::multivalued_field: { if (!value_buffer_.empty()) { @@ -959,7 +987,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } - case stack_item_kind::row_multivalued_field: + case stack_item_kind::multivalued_field: { if (!value_buffer_.empty()) { @@ -1042,7 +1070,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } - case stack_item_kind::row_multivalued_field: + case stack_item_kind::multivalued_field: { if (!value_buffer_.empty()) { @@ -1244,7 +1272,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } - case stack_item_kind::row_multivalued_field: + case stack_item_kind::multivalued_field: case stack_item_kind::column_multivalued_field: break; default: diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 92206d5005..35dcbec66e 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -7,6 +7,7 @@ namespace csv = jsoncons::csv; +#if 0 TEST_CASE("test json to flat csv") { SECTION("array of objects to csv") @@ -147,9 +148,11 @@ NY,LON,TOR;LON CHECK(expected == buf); } } +#endif TEST_CASE("test json to non-flat csv") { +#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -251,5 +254,45 @@ Chicago Sun-Times,1.27,1948-01-01T14:57:13,true,14:57:13,1948-01-01,63 CHECK(expected == buf); } +#endif + SECTION("array of object and subarrays to csv") + { + std::string expected = R"(calculationPeriodCenters,paymentCenters,resetCenters +NY;LON,TOR,LON +NY,LON,TOR;LON +NY;LON,TOR,LON +NY,LON,TOR;LON +)"; + + std::string jtext = R"( +[ + {"calculationPeriodCenters" : ["NY","LON"], + "paymentCenters" : "TOR", + "resetCenters" : "LON"}, + {"calculationPeriodCenters" : "NY", + "paymentCenters" : "LON", + "resetCenters" : ["TOR","LON"]}, + {"calculationPeriodCenters" : ["NY","LON"], + "paymentCenters" : "TOR", + "resetCenters" : "LON"}, + {"calculationPeriodCenters" : "NY", + "paymentCenters" : "LON", + "resetCenters" : ["TOR","LON"]} +] + )"; + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .flat(true) + .subfield_delimiter(';'); + + std::string buf; + csv::csv_string_encoder encoder(buf, options); + j.dump(encoder); + + CHECK(expected == buf); + } } From 1395d12a7ec43b93eb1c34bde790b8b74ab8149e Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Mon, 13 Jan 2025 19:47:21 -0500 Subject: [PATCH 24/79] csv flat mapping multivalued --- include/jsoncons_ext/csv/csv_encoder.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index fb8a718da4..d6c0b2ec94 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -216,7 +216,6 @@ class basic_csv_encoder final : public basic_json_visitor else { stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; - std::cout << "begin_object pathname: " << stack_.back().pathname_ << "\n"; value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); } @@ -248,7 +247,6 @@ class basic_csv_encoder final : public basic_json_visitor { for (std::size_t i = 0; i < column_names_.size(); ++i) { - std::cout << "column-" << i << " name: " << column_names_[i] << "\n"; if (i > 0) { sink_.push_back(options_.field_delimiter()); @@ -365,11 +363,11 @@ class basic_csv_encoder final : public basic_json_visitor } else { - std::cout << "flat_object->row " << stack_.back().pathname_ << "\n"; if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); } + cname_value_map_[stack_.back().pathname_] = std::basic_string(); value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); } @@ -636,6 +634,7 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: case stack_item_kind::object_multivalued_field: { + //stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; if (stack_[0].count_ == 0) { if (options_.column_names().empty()) From 6c3c2dcbda8c2c22a606bf2190079d9ed60f15d3 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Mon, 13 Jan 2025 20:00:18 -0500 Subject: [PATCH 25/79] csv flat mapping multivalued --- include/jsoncons_ext/csv/csv_encoder.hpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index d6c0b2ec94..6c918d5327 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -257,6 +257,7 @@ class basic_csv_encoder final : public basic_json_visitor sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); } + std::cout << "num column_names_: " << column_names_.size() << "\n"; for (std::size_t i = 0; i < column_names_.size(); ++i) { if (i > 0) @@ -363,11 +364,14 @@ class basic_csv_encoder final : public basic_json_visitor } else { - if (options_.column_names().empty()) + if (stack_[0].count_ == 0) { - column_names_.emplace_back(stack_.back().pathname_); + if (options_.column_names().empty()) + { + column_names_.emplace_back(stack_.back().pathname_); + } + cname_value_map_[stack_.back().pathname_] = std::basic_string(); } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); } @@ -640,6 +644,7 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); + std::cout << "Number of columns: " << column_names_.size() << "\n"; } cname_value_map_[stack_.back().pathname_] = std::basic_string(); } From f6fb67aec5584ce91d7044a8b5a5ef21389d960f Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Mon, 13 Jan 2025 20:15:50 -0500 Subject: [PATCH 26/79] csv flat mapping multivalued --- include/jsoncons_ext/csv/csv_encoder.hpp | 2 -- test/csv/src/csv_encoder_tests.cpp | 9 +++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 6c918d5327..eb69e155ab 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -257,7 +257,6 @@ class basic_csv_encoder final : public basic_json_visitor sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); } - std::cout << "num column_names_: " << column_names_.size() << "\n"; for (std::size_t i = 0; i < column_names_.size(); ++i) { if (i > 0) @@ -644,7 +643,6 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); - std::cout << "Number of columns: " << column_names_.size() << "\n"; } cname_value_map_[stack_.back().pathname_] = std::basic_string(); } diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 35dcbec66e..f97768f8ae 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -7,7 +7,7 @@ namespace csv = jsoncons::csv; -#if 0 +//#if 0 TEST_CASE("test json to flat csv") { SECTION("array of objects to csv") @@ -148,11 +148,11 @@ NY,LON,TOR;LON CHECK(expected == buf); } } -#endif +//#endif TEST_CASE("test json to non-flat csv") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -254,7 +254,7 @@ Chicago Sun-Times,1.27,1948-01-01T14:57:13,true,14:57:13,1948-01-01,63 CHECK(expected == buf); } -#endif + SECTION("array of object and subarrays to csv") { std::string expected = R"(calculationPeriodCenters,paymentCenters,resetCenters @@ -294,5 +294,6 @@ NY,LON,TOR;LON CHECK(expected == buf); } +//#endif } From 2294e87214cca0e58880b5e39baf03f64ed3b5b8 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Mon, 13 Jan 2025 20:52:54 -0500 Subject: [PATCH 27/79] csv object of arrays --- test/csv/src/csv_encoder_tests.cpp | 39 +++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index f97768f8ae..4be072dd27 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -7,9 +7,9 @@ namespace csv = jsoncons::csv; -//#if 0 TEST_CASE("test json to flat csv") { +#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -147,12 +147,43 @@ NY,LON,TOR;LON CHECK(expected == buf); } +#endif + + SECTION("object of arrays and subarrays to csv") + { + std::string expected = R"(calculationPeriodCenters,paymentCenters,resetCenters +NY;LON,TOR,LON +NY,LON,TOR;LON +NY;LON,TOR,LON +NY,LON,TOR;LON +)"; + + const std::string jtext = R"( +{ + "a" : [[1,true,null],[-4,5.5,"6"]], + "b" : [[7,8,9],[10,11,12]], + "c" : [15,16,17] +} + )"; + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .subfield_delimiter(';'); + + std::string buf; + csv::csv_string_encoder encoder(buf, options); + j.dump(encoder); + + std::cout << buf << "\n"; + //CHECK(expected == buf); + } } -//#endif TEST_CASE("test json to non-flat csv") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -294,6 +325,6 @@ NY,LON,TOR;LON CHECK(expected == buf); } -//#endif +#endif } From 81102c95f5f59814f2b1ec525906ef5cbe3e9f96 Mon Sep 17 00:00:00 2001 From: c8ef Date: Tue, 14 Jan 2025 22:06:51 +0800 Subject: [PATCH 28/79] fix various header issue(NFC) --- include/jsoncons/utility/heap_string.hpp | 2 ++ include/jsoncons_ext/csv/csv_encoder.hpp | 1 + include/jsoncons_ext/ubjson/ubjson_parser.hpp | 1 + 3 files changed, 4 insertions(+) diff --git a/include/jsoncons/utility/heap_string.hpp b/include/jsoncons/utility/heap_string.hpp index 06aedb8e1a..3fc129e98e 100644 --- a/include/jsoncons/utility/heap_string.hpp +++ b/include/jsoncons/utility/heap_string.hpp @@ -12,6 +12,8 @@ #include // std::allocator #include +#include +#include namespace jsoncons { namespace utility { diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 45c13a4dc0..cf8b5bce76 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include namespace jsoncons { namespace csv { diff --git a/include/jsoncons_ext/ubjson/ubjson_parser.hpp b/include/jsoncons_ext/ubjson/ubjson_parser.hpp index 49119980f0..2954d1e6c2 100644 --- a/include/jsoncons_ext/ubjson/ubjson_parser.hpp +++ b/include/jsoncons_ext/ubjson/ubjson_parser.hpp @@ -16,6 +16,7 @@ #include // std::move #include +#include #include #include #include From f142500d2b82e938e5b9e8fa6c121e1d7b84a0bd Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 15 Jan 2025 12:01:48 -0500 Subject: [PATCH 29/79] csv flat mapping columns --- include/jsoncons_ext/csv/csv_encoder.hpp | 26 +++-- test/csv/src/csv_encoder_tests.cpp | 121 ++++++++++++++++++++--- 2 files changed, 127 insertions(+), 20 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index eb69e155ab..93bf5348bf 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -67,7 +67,6 @@ class basic_csv_encoder final : public basic_json_visitor object, row, column, - object_multivalued_field, multivalued_field, column_multivalued_field }; @@ -220,10 +219,13 @@ class basic_csv_encoder final : public basic_json_visitor stack_.emplace_back(stack_item_kind::multivalued_field); } break; + case stack_item_kind::column_multivalued_field: + break; case stack_item_kind::unmapped: stack_.emplace_back(stack_item_kind::unmapped); break; default: // error + std::cout << "visit_begin_object " << (int)stack_.back().item_kind_ << "\n"; ec = csv_errc::source_error; return false; } @@ -282,9 +284,12 @@ class basic_csv_encoder final : public basic_json_visitor } break; } + case stack_item_kind::column_multivalued_field: + break; case stack_item_kind::unmapped: break; default: + std::cout << "visit_end_object " << (int)stack_.back().item_kind_ << "\n"; ec = csv_errc::source_error; return false; } @@ -320,7 +325,7 @@ class basic_csv_encoder final : public basic_json_visitor stack_.emplace_back(stack_item_kind::row); break; case stack_item_kind::object: - stack_.emplace_back(stack_item_kind::object_multivalued_field); + stack_.emplace_back(stack_item_kind::object); break; case stack_item_kind::column_mapping: stack_.emplace_back(stack_item_kind::column); @@ -378,10 +383,13 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::multivalued_field: stack_.emplace_back(stack_item_kind::unmapped); break; + case stack_item_kind::column_multivalued_field: + break; case stack_item_kind::unmapped: stack_.emplace_back(stack_item_kind::unmapped); break; default: // error + std::cout << "visit_begin_array " << (int)stack_.back().item_kind_ << "\n"; ec = csv_errc::source_error; return false; } @@ -478,9 +486,12 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::column: ++column_index_; break; + case stack_item_kind::column_multivalued_field: + break; case stack_item_kind::unmapped: break; default: + std::cout << "visit_end_array " << (int)stack_.back().item_kind_ << "\n"; ec = csv_errc::source_error; return false; } @@ -552,7 +563,6 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: case stack_item_kind::object: - case stack_item_kind::object_multivalued_field: { if (stack_[0].count_ == 0) { @@ -635,7 +645,6 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: case stack_item_kind::object: - case stack_item_kind::object_multivalued_field: { //stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; if (stack_[0].count_ == 0) @@ -773,7 +782,6 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: case stack_item_kind::object: - case stack_item_kind::object_multivalued_field: { if (stack_[0].count_ == 0) { @@ -859,7 +867,6 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: case stack_item_kind::object: - case stack_item_kind::object_multivalued_field: { if (stack_[0].count_ == 0) { @@ -945,7 +952,6 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: case stack_item_kind::object: - case stack_item_kind::object_multivalued_field: { if (stack_[0].count_ == 0) { @@ -1028,7 +1034,6 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: case stack_item_kind::object: - case stack_item_kind::object_multivalued_field: { if (stack_[0].count_ == 0) { @@ -1275,7 +1280,12 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::multivalued_field: + break; case stack_item_kind::column_multivalued_field: + if (stack_.back().count_ > 0 && options_.subfield_delimiter() != char_type()) + { + sink.push_back(options_.subfield_delimiter()); + } break; default: break; diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 4be072dd27..2168c7e045 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,7 +9,7 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -61,6 +61,56 @@ true,1948-01-01T14:57:13,1.27,Chicago Sun-Times CHECK(expected == buf); } + SECTION("array of objects with some missing members to csv") + { + std::string expected = R"(boolean,datetime,float,text +true,1971-01-01T04:14:00,1.0,Chicago Reader +true,1948-01-01T14:57:13,,Chicago Sun-Times +)"; + + std::string jtext = R"( +[ + { + "text": "Chicago Reader", + "float": 1.0, + "datetime": "1971-01-01T04:14:00", + "boolean": true, + "nested": { + "time": "04:14:00", + "nested": { + "date": "1971-01-01", + "integer": 40 + } + } + }, + { + "text": "Chicago Sun-Times", + "datetime": "1948-01-01T14:57:13", + "boolean": true, + "nested": { + "time": "14:57:13", + "nested": { + "date": "1948-01-01", + "integer": 63 + } + } + } +] + )"; + + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + std::string buf; + csv::csv_string_encoder encoder(buf); + j.dump(encoder); + + //std::cout << buf << "\n"; + + CHECK(expected == buf); + } + SECTION("array of arrays to csv") { std::string expected = R"(Chicago Reader,1.0,1971-01-01T04:14:00,true @@ -147,15 +197,13 @@ NY,LON,TOR;LON CHECK(expected == buf); } -#endif - +//#endif SECTION("object of arrays and subarrays to csv") { - std::string expected = R"(calculationPeriodCenters,paymentCenters,resetCenters -NY;LON,TOR,LON -NY,LON,TOR;LON -NY;LON,TOR,LON -NY,LON,TOR;LON + std::string expected = R"(a,b,c +1;true;null,7;8;9,15 +-4;5.5;6,10;11;12,16 +,,17 )"; const std::string jtext = R"( @@ -176,14 +224,14 @@ NY,LON,TOR;LON csv::csv_string_encoder encoder(buf, options); j.dump(encoder); - std::cout << buf << "\n"; - //CHECK(expected == buf); + //std::cout << buf << "\n"; + CHECK(expected == buf); } } TEST_CASE("test json to non-flat csv") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -234,6 +282,55 @@ true,1948-01-01T14:57:13,1.27,1948-01-01,63,14:57:13,Chicago Sun-Times CHECK(expected == buf); } + + SECTION("array of objects with some missing members to csv") + { + std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text +true,1971-01-01T04:14:00,1.0,1971-01-01,40,04:14:00,Chicago Reader +true,,1.27,,63,14:57:13,Chicago Sun-Times +)"; + + std::string jtext = R"( +[ + { + "text": "Chicago Reader", + "float": 1.0, + "datetime": "1971-01-01T04:14:00", + "boolean": true, + "nested": { + "time": "04:14:00", + "nested": { + "date": "1971-01-01", + "integer": 40 + } + } + }, + { + "text": "Chicago Sun-Times", + "float": 1.27, + "boolean": true, + "nested": { + "time": "14:57:13", + "nested": { + "integer": 63 + } + } + } +] + )"; + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .flat(false); + + std::string buf; + csv::csv_string_encoder encoder(buf, options); + j.dump(encoder); + + CHECK(expected == buf); + } SECTION("array of arrays to csv") { @@ -325,6 +422,6 @@ NY,LON,TOR;LON CHECK(expected == buf); } -#endif +//#endif } From b3bb6ca23fe3a74549027660b6b91245dc1c601c Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 16 Jan 2025 12:42:09 -0500 Subject: [PATCH 30/79] csv encode --- include/jsoncons_ext/csv/csv_encoder.hpp | 7 ++----- include/jsoncons_ext/csv/csv_options.hpp | 17 ++++++----------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 93bf5348bf..a757cb3b45 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -103,7 +103,7 @@ class basic_csv_encoder final : public basic_json_visitor std::vector column_names_; std::unordered_map,std::equal_to,string_string_allocator_type> cname_value_map_; - std::size_t column_index_; + std::size_t column_index_{0}; std::vector row_counts_; string_type buffer_; string_type value_buffer_; @@ -112,8 +112,7 @@ class basic_csv_encoder final : public basic_json_visitor basic_csv_encoder(const basic_csv_encoder&) = delete; basic_csv_encoder& operator=(const basic_csv_encoder&) = delete; public: - basic_csv_encoder(Sink&& sink, - const Allocator& alloc = Allocator()) + basic_csv_encoder(Sink&& sink, const Allocator& alloc = Allocator()) : basic_csv_encoder(std::forward(sink), basic_csv_encode_options(), alloc) { } @@ -124,9 +123,7 @@ class basic_csv_encoder final : public basic_json_visitor : sink_(std::forward(sink)), options_(options), alloc_(alloc), - stack_(), fp_(options.float_format(), options.precision()), - column_index_(0), buffer_(alloc), value_buffer_(alloc) { diff --git a/include/jsoncons_ext/csv/csv_options.hpp b/include/jsoncons_ext/csv/csv_options.hpp index 65a917c4d7..8f1b174069 100644 --- a/include/jsoncons_ext/csv/csv_options.hpp +++ b/include/jsoncons_ext/csv/csv_options.hpp @@ -470,8 +470,8 @@ class basic_csv_decode_options : public virtual basic_csv_options_common bool unquoted_empty_value_is_null_:1; bool infer_types_:1; bool lossless_number_:1; - char_type comment_starter_; - csv_mapping_kind mapping_; + char_type comment_starter_{'\0'}; + csv_mapping_kind mapping_{}; std::size_t header_lines_{0}; std::size_t max_lines_{(std::numeric_limits::max)()}; string_type column_types_; @@ -487,9 +487,7 @@ class basic_csv_decode_options : public virtual basic_csv_options_common trim_trailing_inside_quotes_(false), unquoted_empty_value_is_null_(false), infer_types_(true), - lossless_number_(false), - comment_starter_('\0'), - mapping_() + lossless_number_(false) {} basic_csv_decode_options(const basic_csv_decode_options& other) = default; @@ -619,15 +617,12 @@ class basic_csv_encode_options : public virtual basic_csv_options_common using typename super_type::char_type; using typename super_type::string_type; private: - quote_style_kind quote_style_; - float_chars_format float_format_; - int8_t precision_; + quote_style_kind quote_style_{quote_style_kind::minimal}; + float_chars_format float_format_{float_chars_format::general}; + int8_t precision_{0}; string_type line_delimiter_; public: basic_csv_encode_options() - : quote_style_(quote_style_kind::minimal), - float_format_(float_chars_format::general), - precision_(0) { line_delimiter_.push_back('\n'); } From 92b2bca9c60d05682f1725b6f9218a74f3588c68 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 16 Jan 2025 13:44:46 -0500 Subject: [PATCH 31/79] csv encode --- test/csv/src/csv_encoder_tests.cpp | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 2168c7e045..3fff8ad016 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,7 +9,7 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -197,7 +197,6 @@ NY,LON,TOR;LON CHECK(expected == buf); } -//#endif SECTION("object of arrays and subarrays to csv") { std::string expected = R"(a,b,c @@ -227,11 +226,34 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } +#endif + SECTION("array of subarrays to csv") + { + const std::string jtext = R"( +[ + [[1,2,3],[4,5,6]], + [[7,8,9],[10,11,12]] +] + )"; + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .subfield_delimiter(';'); + + std::string buf; + csv::csv_string_encoder encoder(buf, options); + j.dump(encoder); + + std::cout << buf << "\n"; + //CHECK(expected == buf); + } } TEST_CASE("test json to non-flat csv") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -422,6 +444,6 @@ NY,LON,TOR;LON CHECK(expected == buf); } -//#endif +#endif } From 28992bfc292bbf9b64ca53b27641cbc8dee0d0d5 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 16 Jan 2025 16:16:09 -0500 Subject: [PATCH 32/79] csv encode --- include/jsoncons_ext/csv/csv_encoder.hpp | 9 +++++++-- test/csv/src/csv_encoder_tests.cpp | 12 ++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index a757cb3b45..49e7e6e681 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -102,6 +102,7 @@ class basic_csv_encoder final : public basic_json_visitor std::vector column_names_; std::unordered_map,std::equal_to,string_string_allocator_type> cname_value_map_; + std::unordered_map,std::equal_to,string_string_allocator_type> column_pointer_name_map_; std::size_t column_index_{0}; std::vector row_counts_; @@ -118,8 +119,8 @@ class basic_csv_encoder final : public basic_json_visitor } basic_csv_encoder(Sink&& sink, - const basic_csv_encode_options& options, - const Allocator& alloc = Allocator()) + const basic_csv_encode_options& options, + const Allocator& alloc = Allocator()) : sink_(std::forward(sink)), options_(options), alloc_(alloc), @@ -370,6 +371,7 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); + column_pointer_name_map_.emplace(stack_.back().pathname_, stack_.back().pathname_); } cname_value_map_[stack_.back().pathname_] = std::basic_string(); } @@ -550,6 +552,7 @@ class basic_csv_encoder final : public basic_json_visitor if (stack_[0].count_ == 0 && options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); + column_pointer_name_map_.emplace(stack_.back().pathname_, stack_.back().pathname_); } } @@ -568,6 +571,7 @@ class basic_csv_encoder final : public basic_json_visitor column_names_.emplace_back(stack_.back().pathname_); } cname_value_map_[stack_.back().pathname_] = std::basic_string(); + column_pointer_name_map_.emplace(stack_.back().pathname_, stack_.back().pathname_); } auto it = cname_value_map_.find(stack_.back().pathname_); if (it != cname_value_map_.end()) @@ -649,6 +653,7 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); + column_pointer_name_map_.emplace(stack_.back().pathname_, stack_.back().pathname_); } cname_value_map_[stack_.back().pathname_] = std::basic_string(); } diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 3fff8ad016..7cc752d6e9 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,7 +9,7 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -226,8 +226,8 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } -#endif - SECTION("array of subarrays to csv") +//#endif + /*SECTION("array of subarrays to csv") { const std::string jtext = R"( [ @@ -248,12 +248,12 @@ NY,LON,TOR;LON std::cout << buf << "\n"; //CHECK(expected == buf); - } + }*/ } TEST_CASE("test json to non-flat csv") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -444,6 +444,6 @@ NY,LON,TOR;LON CHECK(expected == buf); } -#endif +//#endif } From 8e15c2fb73d328c975e2ad82f50f89cf1e519027 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 16 Jan 2025 16:38:52 -0500 Subject: [PATCH 33/79] csv encode --- include/jsoncons_ext/csv/csv_encoder.hpp | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 49e7e6e681..ea3ad75aef 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -76,6 +76,7 @@ class basic_csv_encoder final : public basic_json_visitor stack_item_kind item_kind_; std::size_t count_; std::string pathname_; + std::string pointer_; stack_item(stack_item_kind item_kind) noexcept : item_kind_(item_kind), pathname_{}, count_(0) @@ -101,6 +102,7 @@ class basic_csv_encoder final : public basic_json_visitor jsoncons::detail::write_double fp_; std::vector column_names_; + std::vector column_pointers_; std::unordered_map,std::equal_to,string_string_allocator_type> cname_value_map_; std::unordered_map,std::equal_to,string_string_allocator_type> column_pointer_name_map_; @@ -213,6 +215,7 @@ class basic_csv_encoder final : public basic_json_visitor else { stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; + stack_.back().pointer_ = stack_[stack_.size()-2].pointer_; value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); } @@ -251,8 +254,7 @@ class basic_csv_encoder final : public basic_json_visitor { sink_.push_back(options_.field_delimiter()); } - sink_.append(column_names_[i].data(), - column_names_[i].length()); + sink_.append(column_names_[i].data(), column_names_[i].length()); } sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); @@ -371,6 +373,7 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); + column_pointers_.emplace_back(stack_.back().pathname_); column_pointer_name_map_.emplace(stack_.back().pathname_, stack_.back().pathname_); } cname_value_map_[stack_.back().pathname_] = std::basic_string(); @@ -510,9 +513,9 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: { - //stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; - //stack_.back().pathname_.push_back('/'); - //stack_.back().pathname_.append(std::string(name)); + stack_.back().pointer_ = stack_[stack_.size()-2].pointer_; + stack_.back().pointer_.push_back('/'); + stack_.back().pointer_.append(std::string(name)); stack_.back().pathname_ = std::string(name); break; } @@ -521,6 +524,9 @@ class basic_csv_encoder final : public basic_json_visitor stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; stack_.back().pathname_.push_back('/'); stack_.back().pathname_.append(std::string(name)); + stack_.back().pointer_ = stack_[stack_.size()-2].pointer_; + stack_.back().pointer_.push_back('/'); + stack_.back().pointer_.append(std::string(name)); break; } case stack_item_kind::column_mapping: @@ -544,11 +550,15 @@ class basic_csv_encoder final : public basic_json_visitor void append_array_path_component() { - stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; - stack_.back().pathname_.push_back('/'); buffer_.clear(); jsoncons::detail::from_integer(stack_.back().count_, buffer_); + + stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; + stack_.back().pathname_.push_back('/'); stack_.back().pathname_.append(buffer_); + stack_.back().pointer_ = stack_[stack_.size()-2].pointer_; + stack_.back().pointer_.push_back('/'); + stack_.back().pointer_.append(buffer_); if (stack_[0].count_ == 0 && options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); From e0652d00339a55b69ae210ba8994d77e42115bf1 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 16 Jan 2025 19:37:39 -0500 Subject: [PATCH 34/79] csv encode --- include/jsoncons_ext/csv/csv_encoder.hpp | 10 ++++++++-- test/csv/src/csv_encoder_tests.cpp | 11 ++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 6e1a3ab7e9..64e7a89679 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -518,6 +518,10 @@ class basic_csv_encoder final : public basic_json_visitor stack_.back().pointer_.push_back('/'); stack_.back().pointer_.append(std::string(name)); stack_.back().pathname_ = std::string(name); + if (options_.column_names().empty()) + { + column_pointer_name_map_.emplace(stack_.back().pathname_, name); + } break; } case stack_item_kind::object: @@ -528,6 +532,10 @@ class basic_csv_encoder final : public basic_json_visitor stack_.back().pointer_ = stack_[stack_.size()-2].pointer_; stack_.back().pointer_.push_back('/'); stack_.back().pointer_.append(std::string(name)); + if (options_.column_names().empty()) + { + column_pointer_name_map_.emplace(stack_.back().pathname_, stack_.back().pathname_); + } break; } case stack_item_kind::column_mapping: @@ -582,7 +590,6 @@ class basic_csv_encoder final : public basic_json_visitor column_names_.emplace_back(stack_.back().pathname_); } cname_value_map_[stack_.back().pathname_] = std::basic_string(); - column_pointer_name_map_.emplace(stack_.back().pathname_, stack_.back().pathname_); } auto it = cname_value_map_.find(stack_.back().pathname_); if (it != cname_value_map_.end()) @@ -664,7 +671,6 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); - column_pointer_name_map_.emplace(stack_.back().pathname_, stack_.back().pathname_); } cname_value_map_[stack_.back().pathname_] = std::basic_string(); } diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 7cc752d6e9..3897983555 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -1,4 +1,4 @@ -// Copyright 2013-2024 Daniel Parker +// Copyright 2013-2025 Daniel Parker // Distributed under Boost license #include @@ -9,7 +9,6 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv") { -//#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -61,6 +60,8 @@ true,1948-01-01T14:57:13,1.27,Chicago Sun-Times CHECK(expected == buf); } +#if 0 + SECTION("array of objects with some missing members to csv") { std::string expected = R"(boolean,datetime,float,text @@ -226,7 +227,7 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } -//#endif +#endif /*SECTION("array of subarrays to csv") { const std::string jtext = R"( @@ -253,7 +254,7 @@ NY,LON,TOR;LON TEST_CASE("test json to non-flat csv") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -444,6 +445,6 @@ NY,LON,TOR;LON CHECK(expected == buf); } -//#endif +#endif } From c390ce1963f6340c9918607f6e79e6f587ba86b4 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 16 Jan 2025 19:55:35 -0500 Subject: [PATCH 35/79] csv encode --- include/jsoncons_ext/csv/csv_encoder.hpp | 28 +++++++++++++++++------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 64e7a89679..508e56e537 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -247,26 +247,31 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::row_mapping || stack_[stack_.size()-2].item_kind_ == stack_item_kind::flat_row_mapping) { + std::size_t col = 0; if (stack_[0].count_ == 0) { - for (std::size_t i = 0; i < column_names_.size(); ++i) + for (std::size_t i = 0; i < column_pointers_.size(); ++i) { - if (i > 0) + auto it = column_pointer_name_map_.find(column_pointers_[i]); + if (it != column_pointer_name_map_.end()) { - sink_.push_back(options_.field_delimiter()); + if (col > 0) + { + sink_.push_back(options_.field_delimiter()); + } + sink_.append(it->second.data(), it->second.length()); + ++col; } - sink_.append(column_names_[i].data(), column_names_[i].length()); } - sink_.append(options_.line_delimiter().data(), - options_.line_delimiter().length()); + sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); } - for (std::size_t i = 0; i < column_names_.size(); ++i) + for (std::size_t i = 0; i < column_pointers_.size(); ++i) { if (i > 0) { sink_.push_back(options_.field_delimiter()); } - auto it = cname_value_map_.find(column_names_[i]); + auto it = cname_value_map_.find(column_pointers_[i]); if (it != cname_value_map_.end()) { sink_.append(it->second.data(),it->second.length()); @@ -571,6 +576,7 @@ class basic_csv_encoder final : public basic_json_visitor if (stack_[0].count_ == 0 && options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); + column_pointers_.emplace_back(stack_.back().pathname_); column_pointer_name_map_.emplace(stack_.back().pathname_, stack_.back().pathname_); } } @@ -588,6 +594,7 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); + column_pointers_.emplace_back(stack_.back().pathname_); } cname_value_map_[stack_.back().pathname_] = std::basic_string(); } @@ -671,6 +678,7 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); + column_pointers_.emplace_back(stack_.back().pathname_); } cname_value_map_[stack_.back().pathname_] = std::basic_string(); } @@ -807,6 +815,7 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); + column_pointers_.emplace_back(stack_.back().pathname_); } cname_value_map_[stack_.back().pathname_] = std::basic_string(); } @@ -892,6 +901,7 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); + column_pointers_.emplace_back(stack_.back().pathname_); } cname_value_map_[stack_.back().pathname_] = std::basic_string(); } @@ -977,6 +987,7 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); + column_pointers_.emplace_back(stack_.back().pathname_); } cname_value_map_[stack_.back().pathname_] = std::basic_string(); } @@ -1059,6 +1070,7 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); + column_pointers_.emplace_back(stack_.back().pathname_); } cname_value_map_[stack_.back().pathname_] = std::basic_string(); } From 1e1333e349fac56bbe345979b2b5594fe7e573da Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 16 Jan 2025 20:10:08 -0500 Subject: [PATCH 36/79] csv encode --- include/jsoncons_ext/csv/csv_encoder.hpp | 139 +++++++++++------------ 1 file changed, 67 insertions(+), 72 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 508e56e537..da5cfc82e1 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -75,12 +75,12 @@ class basic_csv_encoder final : public basic_json_visitor struct stack_item { stack_item_kind item_kind_; - std::size_t count_; + std::size_t count_{0}; std::string pathname_; std::string pointer_; stack_item(stack_item_kind item_kind) noexcept - : item_kind_(item_kind), pathname_{}, count_(0) + : item_kind_(item_kind) { } @@ -104,7 +104,7 @@ class basic_csv_encoder final : public basic_json_visitor std::vector column_names_; std::vector column_pointers_; - std::unordered_map,std::equal_to,string_string_allocator_type> cname_value_map_; + std::unordered_map,std::equal_to,string_string_allocator_type> column_pointer_value_map_; std::unordered_map,std::equal_to,string_string_allocator_type> column_pointer_name_map_; std::size_t column_index_{0}; @@ -149,7 +149,7 @@ class basic_csv_encoder final : public basic_json_visitor { stack_.clear(); column_names_.clear(); - cname_value_map_.clear(); + column_pointer_value_map_.clear(); column_index_ = 0; row_counts_.clear(); } @@ -271,8 +271,8 @@ class basic_csv_encoder final : public basic_json_visitor { sink_.push_back(options_.field_delimiter()); } - auto it = cname_value_map_.find(column_pointers_[i]); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(column_pointers_[i]); + if (it != column_pointer_value_map_.end()) { sink_.append(it->second.data(),it->second.length()); it->second.clear(); @@ -379,10 +379,9 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); - column_pointers_.emplace_back(stack_.back().pathname_); - column_pointer_name_map_.emplace(stack_.back().pathname_, stack_.back().pathname_); + column_pointers_.emplace_back(stack_.back().pointer_); } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); } value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); @@ -433,14 +432,14 @@ class basic_csv_encoder final : public basic_json_visitor options_.line_delimiter().length()); } - for (std::size_t i = 0; i < column_names_.size(); ++i) + for (std::size_t i = 0; i < column_pointers_.size(); ++i) { if (i > 0) { sink_.push_back(options_.field_delimiter()); } - auto it = cname_value_map_.find(column_names_[i]); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(column_pointers_[i]); + if (it != column_pointer_value_map_.end()) { sink_.append(it->second.data(),it->second.length()); it->second.clear(); @@ -451,8 +450,8 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::multivalued_field: { - auto it = cname_value_map_.find(stack_[stack_.size()-2].pathname_); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(stack_[stack_.size()-2].pointer_); + if (it != column_pointer_value_map_.end()) { it->second.append(value_buffer_.data(),value_buffer_.length()); } @@ -475,14 +474,14 @@ class basic_csv_encoder final : public basic_json_visitor options_.line_delimiter().length()); } - for (std::size_t i = 0; i < column_names_.size(); ++i) + for (std::size_t i = 0; i < column_pointers_.size(); ++i) { if (i > 0) { sink_.push_back(options_.field_delimiter()); } - auto it = cname_value_map_.find(column_names_[i]); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(column_pointers_[i]); + if (it != column_pointer_value_map_.end()) { sink_.append(it->second.data(),it->second.length()); it->second.clear(); @@ -522,24 +521,20 @@ class basic_csv_encoder final : public basic_json_visitor stack_.back().pointer_ = stack_[stack_.size()-2].pointer_; stack_.back().pointer_.push_back('/'); stack_.back().pointer_.append(std::string(name)); - stack_.back().pathname_ = std::string(name); if (options_.column_names().empty()) { - column_pointer_name_map_.emplace(stack_.back().pathname_, name); + column_pointer_name_map_.emplace(stack_.back().pointer_, name); } break; } case stack_item_kind::object: { - stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; - stack_.back().pathname_.push_back('/'); - stack_.back().pathname_.append(std::string(name)); stack_.back().pointer_ = stack_[stack_.size()-2].pointer_; stack_.back().pointer_.push_back('/'); stack_.back().pointer_.append(std::string(name)); if (options_.column_names().empty()) { - column_pointer_name_map_.emplace(stack_.back().pathname_, stack_.back().pathname_); + column_pointer_name_map_.emplace(stack_.back().pointer_, stack_.back().pointer_); } break; } @@ -567,17 +562,17 @@ class basic_csv_encoder final : public basic_json_visitor buffer_.clear(); jsoncons::detail::from_integer(stack_.back().count_, buffer_); - stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; - stack_.back().pathname_.push_back('/'); - stack_.back().pathname_.append(buffer_); + //stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; + //stack_.back().pathname_.push_back('/'); + //stack_.back().pathname_.append(buffer_); stack_.back().pointer_ = stack_[stack_.size()-2].pointer_; stack_.back().pointer_.push_back('/'); stack_.back().pointer_.append(buffer_); if (stack_[0].count_ == 0 && options_.column_names().empty()) { - column_names_.emplace_back(stack_.back().pathname_); - column_pointers_.emplace_back(stack_.back().pathname_); - column_pointer_name_map_.emplace(stack_.back().pathname_, stack_.back().pathname_); + column_names_.emplace_back(stack_.back().pointer_); + column_pointers_.emplace_back(stack_.back().pointer_); + column_pointer_name_map_.emplace(stack_.back().pointer_, stack_.back().pointer_); } } @@ -594,12 +589,12 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); - column_pointers_.emplace_back(stack_.back().pathname_); + column_pointers_.emplace_back(stack_.back().pointer_); } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(stack_.back().pointer_); + if (it != column_pointer_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -619,10 +614,10 @@ class basic_csv_encoder final : public basic_json_visitor append_array_path_component(); if (stack_[0].count_ == 0) { - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(stack_.back().pointer_); + if (it != column_pointer_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -678,18 +673,18 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); - column_pointers_.emplace_back(stack_.back().pathname_); + column_pointers_.emplace_back(stack_.back().pointer_); } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(stack_.back().pointer_); + if (it != column_pointer_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); write_string_value(sv,bo); bo.flush(); - cname_value_map_[stack_.back().pathname_] = s; + column_pointer_value_map_[stack_.back().pointer_] = s; } break; } @@ -699,10 +694,10 @@ class basic_csv_encoder final : public basic_json_visitor append_array_path_component(); if (stack_[0].count_ == 0) { - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(stack_.back().pointer_); + if (it != column_pointer_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -815,12 +810,12 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); - column_pointers_.emplace_back(stack_.back().pathname_); + column_pointers_.emplace_back(stack_.back().pointer_); } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(stack_.back().pointer_); + if (it != column_pointer_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -840,10 +835,10 @@ class basic_csv_encoder final : public basic_json_visitor append_array_path_component(); if (stack_[0].count_ == 0) { - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(stack_.back().pointer_); + if (it != column_pointer_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -901,12 +896,12 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); - column_pointers_.emplace_back(stack_.back().pathname_); + column_pointers_.emplace_back(stack_.back().pointer_); } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(stack_.back().pointer_); + if (it != column_pointer_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -926,10 +921,10 @@ class basic_csv_encoder final : public basic_json_visitor append_array_path_component(); if (stack_[0].count_ == 0) { - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(stack_.back().pointer_); + if (it != column_pointer_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -987,12 +982,12 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); - column_pointers_.emplace_back(stack_.back().pathname_); + column_pointers_.emplace_back(stack_.back().pointer_); } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(stack_.back().pointer_); + if (it != column_pointer_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -1012,10 +1007,10 @@ class basic_csv_encoder final : public basic_json_visitor append_array_path_component(); if (stack_[0].count_ == 0) { - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(stack_.back().pointer_); + if (it != column_pointer_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -1070,12 +1065,12 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); - column_pointers_.emplace_back(stack_.back().pathname_); + column_pointers_.emplace_back(stack_.back().pointer_); } - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(stack_.back().pointer_); + if (it != column_pointer_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); @@ -1095,10 +1090,10 @@ class basic_csv_encoder final : public basic_json_visitor append_array_path_component(); if (stack_[0].count_ == 0) { - cname_value_map_[stack_.back().pathname_] = std::basic_string(); + column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); } - auto it = cname_value_map_.find(stack_.back().pathname_); - if (it != cname_value_map_.end()) + auto it = column_pointer_value_map_.find(stack_.back().pointer_); + if (it != column_pointer_value_map_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); From 6f5b421d4933c2789438645a7222343f4070408b Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 16 Jan 2025 20:48:46 -0500 Subject: [PATCH 37/79] csv encode --- include/jsoncons_ext/csv/csv_encoder.hpp | 28 ++++++++++++------------ test/csv/src/csv_encoder_tests.cpp | 6 +++-- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index da5cfc82e1..2c23628f34 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -247,9 +247,9 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::row_mapping || stack_[stack_.size()-2].item_kind_ == stack_item_kind::flat_row_mapping) { - std::size_t col = 0; if (stack_[0].count_ == 0) { + std::size_t col = 0; for (std::size_t i = 0; i < column_pointers_.size(); ++i) { auto it = column_pointer_name_map_.find(column_pointers_[i]); @@ -283,7 +283,7 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::column_mapping: { - for (const auto& item : column_names_) + for (const auto& item : column_pointers_) { sink_.append(item.data(), item.size()); sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); @@ -378,7 +378,6 @@ class basic_csv_encoder final : public basic_json_visitor { if (options_.column_names().empty()) { - column_names_.emplace_back(stack_.back().pathname_); column_pointers_.emplace_back(stack_.back().pointer_); } column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); @@ -420,16 +419,21 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0 && !options_.column_names().empty()) { - for (std::size_t i = 0; i < column_names_.size(); ++i) + std::size_t col = 0; + for (std::size_t i = 0; i < column_pointers_.size(); ++i) { - if (i > 0) + auto it = column_pointer_name_map_.find(column_pointers_[i]); + if (it != column_pointer_name_map_.end()) { - sink_.push_back(options_.field_delimiter()); + if (col > 0) + { + sink_.push_back(options_.field_delimiter()); + } + sink_.append(it->second.data(), it->second.length()); + ++col; } - sink_.append(column_names_[i].data(), column_names_[i].length()); } - sink_.append(options_.line_delimiter().data(), - options_.line_delimiter().length()); + sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); } for (std::size_t i = 0; i < column_pointers_.size(); ++i) @@ -562,9 +566,6 @@ class basic_csv_encoder final : public basic_json_visitor buffer_.clear(); jsoncons::detail::from_integer(stack_.back().count_, buffer_); - //stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; - //stack_.back().pathname_.push_back('/'); - //stack_.back().pathname_.append(buffer_); stack_.back().pointer_ = stack_[stack_.size()-2].pointer_; stack_.back().pointer_.push_back('/'); stack_.back().pointer_.append(buffer_); @@ -667,12 +668,11 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::flat_object: case stack_item_kind::object: { - //stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; if (stack_[0].count_ == 0) { if (options_.column_names().empty()) { - column_names_.emplace_back(stack_.back().pathname_); + column_names_.emplace_back(stack_.back().pointer_); column_pointers_.emplace_back(stack_.back().pointer_); } column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 3897983555..4e891d13d2 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,6 +9,7 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv") { +#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -60,8 +61,6 @@ true,1948-01-01T14:57:13,1.27,Chicago Sun-Times CHECK(expected == buf); } -#if 0 - SECTION("array of objects with some missing members to csv") { std::string expected = R"(boolean,datetime,float,text @@ -112,6 +111,7 @@ true,1948-01-01T14:57:13,,Chicago Sun-Times CHECK(expected == buf); } +#endif SECTION("array of arrays to csv") { std::string expected = R"(Chicago Reader,1.0,1971-01-01T04:14:00,true @@ -159,6 +159,8 @@ Chicago Sun-Times,1.27,1948-01-01T14:57:13,true CHECK(expected == buf); } +#if 0 + SECTION("array of arrays and subarrays to csv") { std::string expected = R"(calculationPeriodCenters,paymentCenters,resetCenters From 6cd8feb81516b6218cb0d111d00afec0c1866401 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 16 Jan 2025 21:07:42 -0500 Subject: [PATCH 38/79] csv encode --- test/csv/src/csv_encoder_tests.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 4e891d13d2..5a90019e50 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -111,7 +111,6 @@ true,1948-01-01T14:57:13,,Chicago Sun-Times CHECK(expected == buf); } -#endif SECTION("array of arrays to csv") { std::string expected = R"(Chicago Reader,1.0,1971-01-01T04:14:00,true @@ -159,7 +158,6 @@ Chicago Sun-Times,1.27,1948-01-01T14:57:13,true CHECK(expected == buf); } -#if 0 SECTION("array of arrays and subarrays to csv") { @@ -229,8 +227,8 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } -#endif - /*SECTION("array of subarrays to csv") + + SECTION("array of subarrays to csv") { const std::string jtext = R"( [ @@ -251,12 +249,13 @@ NY,LON,TOR;LON std::cout << buf << "\n"; //CHECK(expected == buf); - }*/ + } +#endif } TEST_CASE("test json to non-flat csv") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -447,6 +446,6 @@ NY,LON,TOR;LON CHECK(expected == buf); } -#endif +//#endif } From 8f27edecf37d28050173f99e8d8ef14247572be4 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 16 Jan 2025 21:44:42 -0500 Subject: [PATCH 39/79] csv encode --- include/jsoncons_ext/csv/csv_encoder.hpp | 5 ++++- test/csv/src/csv_encoder_tests.cpp | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 2c23628f34..89fdc37f19 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -573,7 +573,10 @@ class basic_csv_encoder final : public basic_json_visitor { column_names_.emplace_back(stack_.back().pointer_); column_pointers_.emplace_back(stack_.back().pointer_); - column_pointer_name_map_.emplace(stack_.back().pointer_, stack_.back().pointer_); + if (stack_.back().item_kind_ == stack_item_kind::row) + { + column_pointer_name_map_.emplace(stack_.back().pointer_, stack_.back().pointer_); + } } } diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 5a90019e50..c46292fbf6 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,7 +9,7 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -197,7 +197,9 @@ NY,LON,TOR;LON j.dump(encoder); CHECK(expected == buf); - } + } +//#endif +#if 0 SECTION("object of arrays and subarrays to csv") { std::string expected = R"(a,b,c From d3fb27761c6c076ad44b52b214aaf043422ebee2 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Fri, 17 Jan 2025 11:14:52 -0500 Subject: [PATCH 40/79] csv encode columns --- include/jsoncons_ext/csv/csv_encoder.hpp | 68 ++++++++++++------------ test/csv/src/csv_encoder_tests.cpp | 15 +++--- 2 files changed, 41 insertions(+), 42 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 89fdc37f19..be4aa935c7 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -61,12 +61,12 @@ class basic_csv_encoder final : public basic_json_visitor { flat_row_mapping, row_mapping, - column_mapping, flat_object, flat_row, unmapped, object, row, + column_mapping, column, multivalued_field, column_multivalued_field @@ -336,18 +336,18 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::column_mapping: stack_.emplace_back(stack_item_kind::column); row_counts_.push_back(1); - if (column_names_.size() <= row_counts_.back()) + if (column_pointers_.size() <= row_counts_.back()) { - column_names_.emplace_back(); + column_pointers_.emplace_back(); } break; case stack_item_kind::column: { - if (column_names_.size() <= row_counts_.back()) + if (column_pointers_.size() <= row_counts_.back()) { - column_names_.emplace_back(); + column_pointers_.emplace_back(); } - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); begin_value(bo); stack_.emplace_back(stack_item_kind::column_multivalued_field); break; @@ -544,14 +544,14 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column_mapping: { - if (column_names_.empty()) + if (column_pointers_.empty()) { - column_names_.emplace_back(name); + column_pointers_.emplace_back(name); } else { - column_names_[0].push_back(options_.field_delimiter()); - column_names_[0].append(string_type(name)); + column_pointers_[0].push_back(options_.field_delimiter()); + column_pointers_[0].append(string_type(name)); } break; } @@ -643,17 +643,17 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_names_.size() <= row_counts_.back()) + if (column_pointers_.size() <= row_counts_.back()) { - column_names_.emplace_back(); + column_pointers_.emplace_back(); } - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); write_null_value(bo); break; } case stack_item_kind::column_multivalued_field: { - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); write_null_value(bo); break; } @@ -722,17 +722,17 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_names_.size() <= row_counts_.back()) + if (column_pointers_.size() <= row_counts_.back()) { - column_names_.emplace_back(); + column_pointers_.emplace_back(); } - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); write_string_value(sv,bo); break; } case stack_item_kind::column_multivalued_field: { - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); write_string_value(sv,bo); break; } @@ -863,17 +863,17 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_names_.size() <= row_counts_.back()) + if (column_pointers_.size() <= row_counts_.back()) { - column_names_.emplace_back(); + column_pointers_.emplace_back(); } - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); write_double_value(val, context, bo, ec); break; } case stack_item_kind::column_multivalued_field: { - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); write_double_value(val, context, bo, ec); break; } @@ -949,17 +949,17 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_names_.size() <= row_counts_.back()) + if (column_pointers_.size() <= row_counts_.back()) { - column_names_.emplace_back(); + column_pointers_.emplace_back(); } - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); write_int64_value(val, bo); break; } case stack_item_kind::column_multivalued_field: { - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); write_int64_value(val, bo); break; } @@ -1035,17 +1035,17 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_names_.size() <= row_counts_.back()) + if (column_pointers_.size() <= row_counts_.back()) { - column_names_.emplace_back(); + column_pointers_.emplace_back(); } - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); write_uint64_value(val, bo); break; } case stack_item_kind::column_multivalued_field: { - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); write_uint64_value(val, bo); break; } @@ -1118,17 +1118,17 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_names_.size() <= row_counts_.back()) + if (column_pointers_.size() <= row_counts_.back()) { - column_names_.emplace_back(); + column_pointers_.emplace_back(); } - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); write_bool_value(val, bo); break; } case stack_item_kind::column_multivalued_field: { - jsoncons::string_sink> bo(column_names_[row_counts_.back()]); + jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); write_bool_value(val, bo); break; } diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index c46292fbf6..e6831fab26 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,7 +9,7 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -198,8 +198,7 @@ NY,LON,TOR;LON CHECK(expected == buf); } -//#endif -#if 0 +#endif SECTION("object of arrays and subarrays to csv") { std::string expected = R"(a,b,c @@ -229,7 +228,7 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } - +/* SECTION("array of subarrays to csv") { const std::string jtext = R"( @@ -251,13 +250,13 @@ NY,LON,TOR;LON std::cout << buf << "\n"; //CHECK(expected == buf); - } -#endif + } +*/ } TEST_CASE("test json to non-flat csv") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -448,6 +447,6 @@ NY,LON,TOR;LON CHECK(expected == buf); } -//#endif +#endif } From a235fdce7cfcdffbfbd0af30b7bda3bfab6a6c7d Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Fri, 17 Jan 2025 12:16:47 -0500 Subject: [PATCH 41/79] csv encode --- include/jsoncons_ext/csv/csv_encoder.hpp | 11 ++++++++++- test/csv/src/csv_encoder_tests.cpp | 16 ++++++++++------ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index be4aa935c7..47a0e18867 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -363,8 +363,17 @@ class basic_csv_encoder final : public basic_json_visitor else { append_array_path_component(); + if (stack_[0].count_ == 0) + { + //if (options_.column_names().empty()) + //{ + // column_pointers_.emplace_back(stack_.back().pointer_); + //} + column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); + } value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); + stack_.back().pointer_ = stack_[stack_.size()-2].pointer_; } break; case stack_item_kind::flat_object: @@ -417,7 +426,7 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::flat_row: if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::flat_row_mapping) { - if (stack_[0].count_ == 0 && !options_.column_names().empty()) + if (stack_[0].count_ == 0 && !column_pointer_name_map_.empty()) { std::size_t col = 0; for (std::size_t i = 0; i < column_pointers_.size(); ++i) diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index e6831fab26..185ccae24b 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,7 +9,7 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -198,7 +198,7 @@ NY,LON,TOR;LON CHECK(expected == buf); } -#endif + SECTION("object of arrays and subarrays to csv") { std::string expected = R"(a,b,c @@ -228,9 +228,13 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } -/* +//#endif + SECTION("array of subarrays to csv") { + std::string expected = R"(1;2;3,4;5;6 +7;8;9,10;11;12 +)"; const std::string jtext = R"( [ [[1,2,3],[4,5,6]], @@ -251,12 +255,12 @@ NY,LON,TOR;LON std::cout << buf << "\n"; //CHECK(expected == buf); } -*/ + } TEST_CASE("test json to non-flat csv") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -447,6 +451,6 @@ NY,LON,TOR;LON CHECK(expected == buf); } -#endif +//#endif } From a4852823d237dd1624811d6dfac8b8d7ec8cbd2f Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Fri, 17 Jan 2025 12:18:27 -0500 Subject: [PATCH 42/79] csv encode --- test/csv/src/csv_encoder_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 185ccae24b..ccf8fac2e7 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -252,8 +252,8 @@ NY,LON,TOR;LON csv::csv_string_encoder encoder(buf, options); j.dump(encoder); - std::cout << buf << "\n"; - //CHECK(expected == buf); + //std::cout << buf << "\n"; + CHECK(expected == buf); } } From d9c12e097569e0048c3dd388a92fbfb69e82c27e Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Fri, 17 Jan 2025 12:23:11 -0500 Subject: [PATCH 43/79] csv encode --- include/jsoncons_ext/csv/csv_encoder.hpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 47a0e18867..7ab1b389a0 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -365,15 +365,10 @@ class basic_csv_encoder final : public basic_json_visitor append_array_path_component(); if (stack_[0].count_ == 0) { - //if (options_.column_names().empty()) - //{ - // column_pointers_.emplace_back(stack_.back().pointer_); - //} column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); } value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); - stack_.back().pointer_ = stack_[stack_.size()-2].pointer_; } break; case stack_item_kind::flat_object: From 2db5fb7d09d05e0931df7d7be508b7577471e0af Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Fri, 17 Jan 2025 20:47:20 -0500 Subject: [PATCH 44/79] csv_encoder sink->str --- include/jsoncons_ext/csv/csv_encoder.hpp | 184 ++++++++--------------- 1 file changed, 65 insertions(+), 119 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 7ab1b389a0..2ec0e54129 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -162,11 +162,10 @@ class basic_csv_encoder final : public basic_json_visitor private: - template void escape_string(const CharT* s, std::size_t length, CharT quote_char, CharT quote_escape_char, - AnyWriter& sink) + string_type& sink) { const CharT* begin = s; const CharT* end = s + length; @@ -347,8 +346,7 @@ class basic_csv_encoder final : public basic_json_visitor { column_pointers_.emplace_back(); } - jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); - begin_value(bo); + begin_value(column_pointers_[row_counts_.back()]); stack_.emplace_back(stack_item_kind::column_multivalued_field); break; } @@ -605,9 +603,7 @@ class basic_csv_encoder final : public basic_json_visitor if (it != column_pointer_value_map_.end()) { std::basic_string s; - jsoncons::string_sink> bo(s); - write_null_value(bo); - bo.flush(); + write_null_value(s); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); @@ -628,9 +624,7 @@ class basic_csv_encoder final : public basic_json_visitor if (it != column_pointer_value_map_.end()) { std::basic_string s; - jsoncons::string_sink> bo(s); - write_null_value(bo); - bo.flush(); + write_null_value(s); it->second.append(s); } break; @@ -641,8 +635,7 @@ class basic_csv_encoder final : public basic_json_visitor { value_buffer_.push_back(options_.subfield_delimiter()); } - jsoncons::string_sink> bo(value_buffer_); - write_null_value(bo); + write_null_value(value_buffer_); break; } case stack_item_kind::column: @@ -651,14 +644,12 @@ class basic_csv_encoder final : public basic_json_visitor { column_pointers_.emplace_back(); } - jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); - write_null_value(bo); + write_null_value(column_pointers_[row_counts_.back()]); break; } case stack_item_kind::column_multivalued_field: { - jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); - write_null_value(bo); + write_null_value(column_pointers_[row_counts_.back()]); break; } default: @@ -687,11 +678,7 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_pointer_value_map_.find(stack_.back().pointer_); if (it != column_pointer_value_map_.end()) { - std::basic_string s; - jsoncons::string_sink> bo(s); - write_string_value(sv,bo); - bo.flush(); - column_pointer_value_map_[stack_.back().pointer_] = s; + write_string_value(sv, column_pointer_value_map_[stack_.back().pointer_]); } break; } @@ -707,9 +694,7 @@ class basic_csv_encoder final : public basic_json_visitor if (it != column_pointer_value_map_.end()) { std::basic_string s; - jsoncons::string_sink> bo(s); - write_string_value(sv,bo); - bo.flush(); + write_string_value(sv, s); it->second.append(s); } break; @@ -720,8 +705,7 @@ class basic_csv_encoder final : public basic_json_visitor { value_buffer_.push_back(options_.subfield_delimiter()); } - jsoncons::string_sink> bo(value_buffer_); - write_string_value(sv, bo); + write_string_value(sv, value_buffer_); break; } case stack_item_kind::column: @@ -730,14 +714,12 @@ class basic_csv_encoder final : public basic_json_visitor { column_pointers_.emplace_back(); } - jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); - write_string_value(sv,bo); + write_string_value(sv, column_pointers_[row_counts_.back()]); break; } case stack_item_kind::column_multivalued_field: { - jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); - write_string_value(sv,bo); + write_string_value(sv, column_pointers_[row_counts_.back()]); break; } default: @@ -825,9 +807,7 @@ class basic_csv_encoder final : public basic_json_visitor if (it != column_pointer_value_map_.end()) { std::basic_string s; - jsoncons::string_sink> bo(s); - write_double_value(val, context, bo, ec); - bo.flush(); + write_double_value(val, context, s, ec); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); @@ -848,9 +828,7 @@ class basic_csv_encoder final : public basic_json_visitor if (it != column_pointer_value_map_.end()) { std::basic_string s; - jsoncons::string_sink> bo(s); - write_double_value(val, context, bo, ec); - bo.flush(); + write_double_value(val, context, s, ec); it->second.append(s); } break; @@ -861,8 +839,7 @@ class basic_csv_encoder final : public basic_json_visitor { value_buffer_.push_back(options_.subfield_delimiter()); } - jsoncons::string_sink> bo(value_buffer_); - write_double_value(val, context, bo, ec); + write_double_value(val, context, value_buffer_, ec); break; } case stack_item_kind::column: @@ -871,14 +848,12 @@ class basic_csv_encoder final : public basic_json_visitor { column_pointers_.emplace_back(); } - jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); - write_double_value(val, context, bo, ec); + write_double_value(val, context, column_pointers_[row_counts_.back()], ec); break; } case stack_item_kind::column_multivalued_field: { - jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); - write_double_value(val, context, bo, ec); + write_double_value(val, context, column_pointers_[row_counts_.back()], ec); break; } default: @@ -911,9 +886,7 @@ class basic_csv_encoder final : public basic_json_visitor if (it != column_pointer_value_map_.end()) { std::basic_string s; - jsoncons::string_sink> bo(s); - write_int64_value(val,bo); - bo.flush(); + write_int64_value(val, s); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); @@ -934,9 +907,7 @@ class basic_csv_encoder final : public basic_json_visitor if (it != column_pointer_value_map_.end()) { std::basic_string s; - jsoncons::string_sink> bo(s); - write_int64_value(val,bo); - bo.flush(); + write_int64_value(val, s); it->second.append(s); } break; @@ -947,8 +918,7 @@ class basic_csv_encoder final : public basic_json_visitor { value_buffer_.push_back(options_.subfield_delimiter()); } - jsoncons::string_sink> bo(value_buffer_); - write_int64_value(val, bo); + write_int64_value(val, value_buffer_); break; } case stack_item_kind::column: @@ -957,14 +927,12 @@ class basic_csv_encoder final : public basic_json_visitor { column_pointers_.emplace_back(); } - jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); - write_int64_value(val, bo); + write_int64_value(val, column_pointers_[row_counts_.back()]); break; } case stack_item_kind::column_multivalued_field: { - jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); - write_int64_value(val, bo); + write_int64_value(val, column_pointers_[row_counts_.back()]); break; } default: @@ -997,9 +965,7 @@ class basic_csv_encoder final : public basic_json_visitor if (it != column_pointer_value_map_.end()) { std::basic_string s; - jsoncons::string_sink> bo(s); - write_uint64_value(val, bo); - bo.flush(); + write_uint64_value(val, s); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); @@ -1020,9 +986,7 @@ class basic_csv_encoder final : public basic_json_visitor if (it != column_pointer_value_map_.end()) { std::basic_string s; - jsoncons::string_sink> bo(s); - write_uint64_value(val, bo); - bo.flush(); + write_uint64_value(val, s); it->second.append(s); } break; @@ -1033,8 +997,7 @@ class basic_csv_encoder final : public basic_json_visitor { value_buffer_.push_back(options_.subfield_delimiter()); } - jsoncons::string_sink> bo(value_buffer_); - write_uint64_value(val, bo); + write_uint64_value(val, value_buffer_); break; } case stack_item_kind::column: @@ -1043,14 +1006,12 @@ class basic_csv_encoder final : public basic_json_visitor { column_pointers_.emplace_back(); } - jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); - write_uint64_value(val, bo); + write_uint64_value(val, column_pointers_[row_counts_.back()]); break; } case stack_item_kind::column_multivalued_field: { - jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); - write_uint64_value(val, bo); + write_uint64_value(val, column_pointers_[row_counts_.back()]); break; } default: @@ -1080,9 +1041,7 @@ class basic_csv_encoder final : public basic_json_visitor if (it != column_pointer_value_map_.end()) { std::basic_string s; - jsoncons::string_sink> bo(s); - write_bool_value(val,bo); - bo.flush(); + write_bool_value(val, s); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); @@ -1103,9 +1062,7 @@ class basic_csv_encoder final : public basic_json_visitor if (it != column_pointer_value_map_.end()) { std::basic_string s; - jsoncons::string_sink> bo(s); - write_bool_value(val,bo); - bo.flush(); + write_bool_value(val, s); it->second.append(s); } break; @@ -1116,8 +1073,7 @@ class basic_csv_encoder final : public basic_json_visitor { value_buffer_.push_back(options_.subfield_delimiter()); } - jsoncons::string_sink> bo(value_buffer_); - write_bool_value(val, bo); + write_bool_value(val, value_buffer_); break; } case stack_item_kind::column: @@ -1126,14 +1082,12 @@ class basic_csv_encoder final : public basic_json_visitor { column_pointers_.emplace_back(); } - jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); - write_bool_value(val, bo); + write_bool_value(val, column_pointers_[row_counts_.back()]); break; } case stack_item_kind::column_multivalued_field: { - jsoncons::string_sink> bo(column_pointers_[row_counts_.back()]); - write_bool_value(val, bo); + write_bool_value(val, column_pointers_[row_counts_.back()]); break; } default: @@ -1142,8 +1096,7 @@ class basic_csv_encoder final : public basic_json_visitor return true; } - template - bool do_string_value(const CharT* s, std::size_t length, AnyWriter& sink) + bool do_string_value(const CharT* s, std::size_t length, string_type& str) { bool quote = false; if (options_.quote_style() == quote_style_kind::all || options_.quote_style() == quote_style_kind::nonnumeric || @@ -1151,29 +1104,27 @@ class basic_csv_encoder final : public basic_json_visitor (std::char_traits::find(s, length, options_.field_delimiter()) != nullptr || std::char_traits::find(s, length, options_.quote_char()) != nullptr))) { quote = true; - sink.push_back(options_.quote_char()); + str.push_back(options_.quote_char()); } - escape_string(s, length, options_.quote_char(), options_.quote_escape_char(), sink); + escape_string(s, length, options_.quote_char(), options_.quote_escape_char(), str); if (quote) { - sink.push_back(options_.quote_char()); + str.push_back(options_.quote_char()); } return true; } - template - void write_string_value(const string_view_type& value, AnyWriter& sink) + void write_string_value(const string_view_type& value, string_type& str) { - begin_value(sink); - do_string_value(value.data(),value.length(),sink); + begin_value(str); + do_string_value(value.data(),value.length(), str); end_value(); } - template - void write_double_value(double val, const ser_context& context, AnyWriter& sink, std::error_code& ec) + void write_double_value(double val, const ser_context& context, string_type& str, std::error_code& ec) { - begin_value(sink); + begin_value(str); if (!std::isfinite(val)) { @@ -1181,7 +1132,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (options_.enable_nan_to_num()) { - sink.append(options_.nan_to_num().data(), options_.nan_to_num().length()); + str.append(options_.nan_to_num().data(), options_.nan_to_num().length()); } else if (options_.enable_nan_to_str()) { @@ -1189,14 +1140,14 @@ class basic_csv_encoder final : public basic_json_visitor } else { - sink.append(null_constant().data(), null_constant().size()); + str.append(null_constant().data(), null_constant().size()); } } else if (val == std::numeric_limits::infinity()) { if (options_.enable_inf_to_num()) { - sink.append(options_.inf_to_num().data(), options_.inf_to_num().length()); + str.append(options_.inf_to_num().data(), options_.inf_to_num().length()); } else if (options_.enable_inf_to_str()) { @@ -1204,14 +1155,14 @@ class basic_csv_encoder final : public basic_json_visitor } else { - sink.append(null_constant().data(), null_constant().size()); + str.append(null_constant().data(), null_constant().size()); } } else { if (options_.enable_neginf_to_num()) { - sink.append(options_.neginf_to_num().data(), options_.neginf_to_num().length()); + str.append(options_.neginf_to_num().data(), options_.neginf_to_num().length()); } else if (options_.enable_neginf_to_str()) { @@ -1219,67 +1170,62 @@ class basic_csv_encoder final : public basic_json_visitor } else { - sink.append(null_constant().data(), null_constant().size()); + str.append(null_constant().data(), null_constant().size()); } } } else { - fp_(val, sink); + fp_(val, str); } end_value(); } - template - void write_int64_value(int64_t val, AnyWriter& sink) + void write_int64_value(int64_t val, string_type& str) { - begin_value(sink); + begin_value(str); - jsoncons::detail::from_integer(val,sink); + jsoncons::detail::from_integer(val,str); end_value(); } - template - void write_uint64_value(uint64_t val, AnyWriter& sink) + void write_uint64_value(uint64_t val, string_type& str) { - begin_value(sink); + begin_value(str); - jsoncons::detail::from_integer(val,sink); + jsoncons::detail::from_integer(val,str); end_value(); } - template - void write_bool_value(bool val, AnyWriter& sink) + void write_bool_value(bool val, string_type& str) { - begin_value(sink); + begin_value(str); if (val) { - sink.append(true_constant().data(), true_constant().size()); + str.append(true_constant().data(), true_constant().size()); } else { - sink.append(false_constant().data(), false_constant().size()); + str.append(false_constant().data(), false_constant().size()); } end_value(); } - template - bool write_null_value(AnyWriter& sink) + bool write_null_value(string_type& str) { - begin_value(sink); - sink.append(null_constant().data(), null_constant().size()); + begin_value(str); + str.append(null_constant().data(), null_constant().size()); end_value(); return true; } - template - void begin_value(AnyWriter& sink) + void begin_value(string_type& str) { if (stack_.empty()) { @@ -1298,7 +1244,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (row_counts_[i] <= row_counts_.back()) { - sink.push_back(options_.field_delimiter()); + str.push_back(options_.field_delimiter()); } else { @@ -1308,7 +1254,7 @@ class basic_csv_encoder final : public basic_json_visitor } if (column_index_ > 0) { - sink.push_back(options_.field_delimiter()); + str.push_back(options_.field_delimiter()); } break; } @@ -1317,7 +1263,7 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::column_multivalued_field: if (stack_.back().count_ > 0 && options_.subfield_delimiter() != char_type()) { - sink.push_back(options_.subfield_delimiter()); + str.push_back(options_.subfield_delimiter()); } break; default: From a44ddeceafea71d2b3f06b466e23dc16dcdc66df Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Fri, 17 Jan 2025 20:57:09 -0500 Subject: [PATCH 45/79] csv_encoder sink->str --- include/jsoncons_ext/csv/csv_encoder.hpp | 44 ++++++------------------ 1 file changed, 11 insertions(+), 33 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 2ec0e54129..a6fe251968 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -602,13 +602,11 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_pointer_value_map_.find(stack_.back().pointer_); if (it != column_pointer_value_map_.end()) { - std::basic_string s; - write_null_value(s); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); } - it->second.append(s); + write_null_value(it->second); } break; } @@ -623,9 +621,7 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_pointer_value_map_.find(stack_.back().pointer_); if (it != column_pointer_value_map_.end()) { - std::basic_string s; - write_null_value(s); - it->second.append(s); + write_null_value(it->second); } break; } @@ -693,9 +689,7 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_pointer_value_map_.find(stack_.back().pointer_); if (it != column_pointer_value_map_.end()) { - std::basic_string s; - write_string_value(sv, s); - it->second.append(s); + write_string_value(sv, it->second); } break; } @@ -806,13 +800,11 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_pointer_value_map_.find(stack_.back().pointer_); if (it != column_pointer_value_map_.end()) { - std::basic_string s; - write_double_value(val, context, s, ec); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); } - it->second.append(s); + write_double_value(val, context, it->second, ec); } break; } @@ -827,9 +819,7 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_pointer_value_map_.find(stack_.back().pointer_); if (it != column_pointer_value_map_.end()) { - std::basic_string s; - write_double_value(val, context, s, ec); - it->second.append(s); + write_double_value(val, context, it->second, ec); } break; } @@ -885,13 +875,11 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_pointer_value_map_.find(stack_.back().pointer_); if (it != column_pointer_value_map_.end()) { - std::basic_string s; - write_int64_value(val, s); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); } - it->second.append(s); + write_int64_value(val, it->second); } break; } @@ -906,9 +894,7 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_pointer_value_map_.find(stack_.back().pointer_); if (it != column_pointer_value_map_.end()) { - std::basic_string s; - write_int64_value(val, s); - it->second.append(s); + write_int64_value(val, it->second); } break; } @@ -964,13 +950,11 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_pointer_value_map_.find(stack_.back().pointer_); if (it != column_pointer_value_map_.end()) { - std::basic_string s; - write_uint64_value(val, s); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); } - it->second.append(s); + write_uint64_value(val, it->second); } break; } @@ -985,9 +969,7 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_pointer_value_map_.find(stack_.back().pointer_); if (it != column_pointer_value_map_.end()) { - std::basic_string s; - write_uint64_value(val, s); - it->second.append(s); + write_uint64_value(val, it->second); } break; } @@ -1040,13 +1022,11 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_pointer_value_map_.find(stack_.back().pointer_); if (it != column_pointer_value_map_.end()) { - std::basic_string s; - write_bool_value(val, s); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); } - it->second.append(s); + write_bool_value(val, it->second); } break; } @@ -1061,9 +1041,7 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_pointer_value_map_.find(stack_.back().pointer_); if (it != column_pointer_value_map_.end()) { - std::basic_string s; - write_bool_value(val, s); - it->second.append(s); + write_bool_value(val, it->second); } break; } From 2e58676325675ffec9beb0a66f7d284a11591abc Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Sun, 19 Jan 2025 17:06:25 -0500 Subject: [PATCH 46/79] csv_options column_mapping --- include/jsoncons_ext/csv/csv_encoder.hpp | 238 +++++++++++------------ include/jsoncons_ext/csv/csv_options.hpp | 15 +- 2 files changed, 133 insertions(+), 120 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index a6fe251968..3af223d37d 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -77,7 +77,7 @@ class basic_csv_encoder final : public basic_json_visitor stack_item_kind item_kind_; std::size_t count_{0}; std::string pathname_; - std::string pointer_; + std::string column_path_; stack_item(stack_item_kind item_kind) noexcept : item_kind_(item_kind) @@ -103,9 +103,9 @@ class basic_csv_encoder final : public basic_json_visitor jsoncons::detail::write_double fp_; std::vector column_names_; - std::vector column_pointers_; - std::unordered_map,std::equal_to,string_string_allocator_type> column_pointer_value_map_; - std::unordered_map,std::equal_to,string_string_allocator_type> column_pointer_name_map_; + std::vector column_paths_; + std::unordered_map,std::equal_to,string_string_allocator_type> column_path_value_map_; + std::unordered_map,std::equal_to,string_string_allocator_type> column_path_name_map_; std::size_t column_index_{0}; std::vector row_counts_; @@ -149,7 +149,7 @@ class basic_csv_encoder final : public basic_json_visitor { stack_.clear(); column_names_.clear(); - column_pointer_value_map_.clear(); + column_path_value_map_.clear(); column_index_ = 0; row_counts_.clear(); } @@ -215,7 +215,7 @@ class basic_csv_encoder final : public basic_json_visitor else { stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; - stack_.back().pointer_ = stack_[stack_.size()-2].pointer_; + stack_.back().column_path_ = stack_[stack_.size()-2].column_path_; value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); } @@ -249,10 +249,10 @@ class basic_csv_encoder final : public basic_json_visitor if (stack_[0].count_ == 0) { std::size_t col = 0; - for (std::size_t i = 0; i < column_pointers_.size(); ++i) + for (std::size_t i = 0; i < column_paths_.size(); ++i) { - auto it = column_pointer_name_map_.find(column_pointers_[i]); - if (it != column_pointer_name_map_.end()) + auto it = column_path_name_map_.find(column_paths_[i]); + if (it != column_path_name_map_.end()) { if (col > 0) { @@ -264,14 +264,14 @@ class basic_csv_encoder final : public basic_json_visitor } sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); } - for (std::size_t i = 0; i < column_pointers_.size(); ++i) + for (std::size_t i = 0; i < column_paths_.size(); ++i) { if (i > 0) { sink_.push_back(options_.field_delimiter()); } - auto it = column_pointer_value_map_.find(column_pointers_[i]); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(column_paths_[i]); + if (it != column_path_value_map_.end()) { sink_.append(it->second.data(),it->second.length()); it->second.clear(); @@ -282,7 +282,7 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::column_mapping: { - for (const auto& item : column_pointers_) + for (const auto& item : column_paths_) { sink_.append(item.data(), item.size()); sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); @@ -335,18 +335,18 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::column_mapping: stack_.emplace_back(stack_item_kind::column); row_counts_.push_back(1); - if (column_pointers_.size() <= row_counts_.back()) + if (column_paths_.size() <= row_counts_.back()) { - column_pointers_.emplace_back(); + column_paths_.emplace_back(); } break; case stack_item_kind::column: { - if (column_pointers_.size() <= row_counts_.back()) + if (column_paths_.size() <= row_counts_.back()) { - column_pointers_.emplace_back(); + column_paths_.emplace_back(); } - begin_value(column_pointers_[row_counts_.back()]); + begin_value(column_paths_[row_counts_.back()]); stack_.emplace_back(stack_item_kind::column_multivalued_field); break; } @@ -363,7 +363,7 @@ class basic_csv_encoder final : public basic_json_visitor append_array_path_component(); if (stack_[0].count_ == 0) { - column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); @@ -380,9 +380,9 @@ class basic_csv_encoder final : public basic_json_visitor { if (options_.column_names().empty()) { - column_pointers_.emplace_back(stack_.back().pointer_); + column_paths_.emplace_back(stack_.back().column_path_); } - column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); @@ -419,13 +419,13 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::flat_row: if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::flat_row_mapping) { - if (stack_[0].count_ == 0 && !column_pointer_name_map_.empty()) + if (stack_[0].count_ == 0 && !column_path_name_map_.empty()) { std::size_t col = 0; - for (std::size_t i = 0; i < column_pointers_.size(); ++i) + for (std::size_t i = 0; i < column_paths_.size(); ++i) { - auto it = column_pointer_name_map_.find(column_pointers_[i]); - if (it != column_pointer_name_map_.end()) + auto it = column_path_name_map_.find(column_paths_[i]); + if (it != column_path_name_map_.end()) { if (col > 0) { @@ -438,14 +438,14 @@ class basic_csv_encoder final : public basic_json_visitor sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); } - for (std::size_t i = 0; i < column_pointers_.size(); ++i) + for (std::size_t i = 0; i < column_paths_.size(); ++i) { if (i > 0) { sink_.push_back(options_.field_delimiter()); } - auto it = column_pointer_value_map_.find(column_pointers_[i]); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(column_paths_[i]); + if (it != column_path_value_map_.end()) { sink_.append(it->second.data(),it->second.length()); it->second.clear(); @@ -456,8 +456,8 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::multivalued_field: { - auto it = column_pointer_value_map_.find(stack_[stack_.size()-2].pointer_); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(stack_[stack_.size()-2].column_path_); + if (it != column_path_value_map_.end()) { it->second.append(value_buffer_.data(),value_buffer_.length()); } @@ -480,14 +480,14 @@ class basic_csv_encoder final : public basic_json_visitor options_.line_delimiter().length()); } - for (std::size_t i = 0; i < column_pointers_.size(); ++i) + for (std::size_t i = 0; i < column_paths_.size(); ++i) { if (i > 0) { sink_.push_back(options_.field_delimiter()); } - auto it = column_pointer_value_map_.find(column_pointers_[i]); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(column_paths_[i]); + if (it != column_path_value_map_.end()) { sink_.append(it->second.data(),it->second.length()); it->second.clear(); @@ -524,36 +524,36 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: { - stack_.back().pointer_ = stack_[stack_.size()-2].pointer_; - stack_.back().pointer_.push_back('/'); - stack_.back().pointer_.append(std::string(name)); + stack_.back().column_path_ = stack_[stack_.size()-2].column_path_; + stack_.back().column_path_.push_back('/'); + stack_.back().column_path_.append(std::string(name)); if (options_.column_names().empty()) { - column_pointer_name_map_.emplace(stack_.back().pointer_, name); + column_path_name_map_.emplace(stack_.back().column_path_, name); } break; } case stack_item_kind::object: { - stack_.back().pointer_ = stack_[stack_.size()-2].pointer_; - stack_.back().pointer_.push_back('/'); - stack_.back().pointer_.append(std::string(name)); + stack_.back().column_path_ = stack_[stack_.size()-2].column_path_; + stack_.back().column_path_.push_back('/'); + stack_.back().column_path_.append(std::string(name)); if (options_.column_names().empty()) { - column_pointer_name_map_.emplace(stack_.back().pointer_, stack_.back().pointer_); + column_path_name_map_.emplace(stack_.back().column_path_, stack_.back().column_path_); } break; } case stack_item_kind::column_mapping: { - if (column_pointers_.empty()) + if (column_paths_.empty()) { - column_pointers_.emplace_back(name); + column_paths_.emplace_back(name); } else { - column_pointers_[0].push_back(options_.field_delimiter()); - column_pointers_[0].append(string_type(name)); + column_paths_[0].push_back(options_.field_delimiter()); + column_paths_[0].append(string_type(name)); } break; } @@ -568,16 +568,16 @@ class basic_csv_encoder final : public basic_json_visitor buffer_.clear(); jsoncons::detail::from_integer(stack_.back().count_, buffer_); - stack_.back().pointer_ = stack_[stack_.size()-2].pointer_; - stack_.back().pointer_.push_back('/'); - stack_.back().pointer_.append(buffer_); + stack_.back().column_path_ = stack_[stack_.size()-2].column_path_; + stack_.back().column_path_.push_back('/'); + stack_.back().column_path_.append(buffer_); if (stack_[0].count_ == 0 && options_.column_names().empty()) { - column_names_.emplace_back(stack_.back().pointer_); - column_pointers_.emplace_back(stack_.back().pointer_); + column_names_.emplace_back(stack_.back().column_path_); + column_paths_.emplace_back(stack_.back().column_path_); if (stack_.back().item_kind_ == stack_item_kind::row) { - column_pointer_name_map_.emplace(stack_.back().pointer_, stack_.back().pointer_); + column_path_name_map_.emplace(stack_.back().column_path_, stack_.back().column_path_); } } } @@ -595,12 +595,12 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); - column_pointers_.emplace_back(stack_.back().pointer_); + column_paths_.emplace_back(stack_.back().column_path_); } - column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_pointer_value_map_.find(stack_.back().pointer_); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { @@ -616,10 +616,10 @@ class basic_csv_encoder final : public basic_json_visitor append_array_path_component(); if (stack_[0].count_ == 0) { - column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_pointer_value_map_.find(stack_.back().pointer_); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { write_null_value(it->second); } @@ -636,16 +636,16 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_pointers_.size() <= row_counts_.back()) + if (column_paths_.size() <= row_counts_.back()) { - column_pointers_.emplace_back(); + column_paths_.emplace_back(); } - write_null_value(column_pointers_[row_counts_.back()]); + write_null_value(column_paths_[row_counts_.back()]); break; } case stack_item_kind::column_multivalued_field: { - write_null_value(column_pointers_[row_counts_.back()]); + write_null_value(column_paths_[row_counts_.back()]); break; } default: @@ -666,15 +666,15 @@ class basic_csv_encoder final : public basic_json_visitor { if (options_.column_names().empty()) { - column_names_.emplace_back(stack_.back().pointer_); - column_pointers_.emplace_back(stack_.back().pointer_); + column_names_.emplace_back(stack_.back().column_path_); + column_paths_.emplace_back(stack_.back().column_path_); } - column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_pointer_value_map_.find(stack_.back().pointer_); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { - write_string_value(sv, column_pointer_value_map_[stack_.back().pointer_]); + write_string_value(sv, column_path_value_map_[stack_.back().column_path_]); } break; } @@ -684,10 +684,10 @@ class basic_csv_encoder final : public basic_json_visitor append_array_path_component(); if (stack_[0].count_ == 0) { - column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_pointer_value_map_.find(stack_.back().pointer_); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { write_string_value(sv, it->second); } @@ -704,16 +704,16 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_pointers_.size() <= row_counts_.back()) + if (column_paths_.size() <= row_counts_.back()) { - column_pointers_.emplace_back(); + column_paths_.emplace_back(); } - write_string_value(sv, column_pointers_[row_counts_.back()]); + write_string_value(sv, column_paths_[row_counts_.back()]); break; } case stack_item_kind::column_multivalued_field: { - write_string_value(sv, column_pointers_[row_counts_.back()]); + write_string_value(sv, column_paths_[row_counts_.back()]); break; } default: @@ -793,12 +793,12 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); - column_pointers_.emplace_back(stack_.back().pointer_); + column_paths_.emplace_back(stack_.back().column_path_); } - column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_pointer_value_map_.find(stack_.back().pointer_); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { @@ -814,10 +814,10 @@ class basic_csv_encoder final : public basic_json_visitor append_array_path_component(); if (stack_[0].count_ == 0) { - column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_pointer_value_map_.find(stack_.back().pointer_); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { write_double_value(val, context, it->second, ec); } @@ -834,16 +834,16 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_pointers_.size() <= row_counts_.back()) + if (column_paths_.size() <= row_counts_.back()) { - column_pointers_.emplace_back(); + column_paths_.emplace_back(); } - write_double_value(val, context, column_pointers_[row_counts_.back()], ec); + write_double_value(val, context, column_paths_[row_counts_.back()], ec); break; } case stack_item_kind::column_multivalued_field: { - write_double_value(val, context, column_pointers_[row_counts_.back()], ec); + write_double_value(val, context, column_paths_[row_counts_.back()], ec); break; } default: @@ -868,12 +868,12 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); - column_pointers_.emplace_back(stack_.back().pointer_); + column_paths_.emplace_back(stack_.back().column_path_); } - column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_pointer_value_map_.find(stack_.back().pointer_); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { @@ -889,10 +889,10 @@ class basic_csv_encoder final : public basic_json_visitor append_array_path_component(); if (stack_[0].count_ == 0) { - column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_pointer_value_map_.find(stack_.back().pointer_); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { write_int64_value(val, it->second); } @@ -909,16 +909,16 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_pointers_.size() <= row_counts_.back()) + if (column_paths_.size() <= row_counts_.back()) { - column_pointers_.emplace_back(); + column_paths_.emplace_back(); } - write_int64_value(val, column_pointers_[row_counts_.back()]); + write_int64_value(val, column_paths_[row_counts_.back()]); break; } case stack_item_kind::column_multivalued_field: { - write_int64_value(val, column_pointers_[row_counts_.back()]); + write_int64_value(val, column_paths_[row_counts_.back()]); break; } default: @@ -943,12 +943,12 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); - column_pointers_.emplace_back(stack_.back().pointer_); + column_paths_.emplace_back(stack_.back().column_path_); } - column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_pointer_value_map_.find(stack_.back().pointer_); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { @@ -964,10 +964,10 @@ class basic_csv_encoder final : public basic_json_visitor append_array_path_component(); if (stack_[0].count_ == 0) { - column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_pointer_value_map_.find(stack_.back().pointer_); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { write_uint64_value(val, it->second); } @@ -984,16 +984,16 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_pointers_.size() <= row_counts_.back()) + if (column_paths_.size() <= row_counts_.back()) { - column_pointers_.emplace_back(); + column_paths_.emplace_back(); } - write_uint64_value(val, column_pointers_[row_counts_.back()]); + write_uint64_value(val, column_paths_[row_counts_.back()]); break; } case stack_item_kind::column_multivalued_field: { - write_uint64_value(val, column_pointers_[row_counts_.back()]); + write_uint64_value(val, column_paths_[row_counts_.back()]); break; } default: @@ -1015,12 +1015,12 @@ class basic_csv_encoder final : public basic_json_visitor if (options_.column_names().empty()) { column_names_.emplace_back(stack_.back().pathname_); - column_pointers_.emplace_back(stack_.back().pointer_); + column_paths_.emplace_back(stack_.back().column_path_); } - column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_pointer_value_map_.find(stack_.back().pointer_); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { @@ -1036,10 +1036,10 @@ class basic_csv_encoder final : public basic_json_visitor append_array_path_component(); if (stack_[0].count_ == 0) { - column_pointer_value_map_[stack_.back().pointer_] = std::basic_string(); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_pointer_value_map_.find(stack_.back().pointer_); - if (it != column_pointer_value_map_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { write_bool_value(val, it->second); } @@ -1056,16 +1056,16 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_pointers_.size() <= row_counts_.back()) + if (column_paths_.size() <= row_counts_.back()) { - column_pointers_.emplace_back(); + column_paths_.emplace_back(); } - write_bool_value(val, column_pointers_[row_counts_.back()]); + write_bool_value(val, column_paths_[row_counts_.back()]); break; } case stack_item_kind::column_multivalued_field: { - write_bool_value(val, column_pointers_[row_counts_.back()]); + write_bool_value(val, column_paths_[row_counts_.back()]); break; } default: diff --git a/include/jsoncons_ext/csv/csv_options.hpp b/include/jsoncons_ext/csv/csv_options.hpp index 8f1b174069..c977bfadbb 100644 --- a/include/jsoncons_ext/csv/csv_options.hpp +++ b/include/jsoncons_ext/csv/csv_options.hpp @@ -293,6 +293,7 @@ class basic_csv_options_common string_type inf_to_str_; string_type neginf_to_str_; string_type column_names_; + std::vector> column_mapping_; protected: basic_csv_options_common() @@ -341,11 +342,16 @@ class basic_csv_options_common return quote_escape_char_; } - string_type column_names() const + const string_type& column_names() const { return column_names_; } + const std::vector>& column_mapping() const + { + return column_mapping_; + } + bool enable_nan_to_num() const { return enable_nan_to_num_; @@ -686,6 +692,7 @@ class basic_csv_options final : public basic_csv_decode_options, public b using basic_csv_decode_options::quote_char; using basic_csv_decode_options::quote_escape_char; using basic_csv_decode_options::column_names; + using basic_csv_decode_options::column_mapping; using basic_csv_decode_options::header_lines; using basic_csv_decode_options::assume_header; using basic_csv_decode_options::ignore_empty_values; @@ -805,6 +812,12 @@ class basic_csv_options final : public basic_csv_decode_options, public b return *this; } + basic_csv_options& column_mapping(const std::vector& value) + { + this->column_mapping_ = value; + return *this; + } + basic_csv_options& column_types(const string_type& value) { this->column_types_ = value; From 7ff3f0f3ac7d2d921d15be8a0530d28cf825dc81 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Sun, 19 Jan 2025 17:30:12 -0500 Subject: [PATCH 47/79] csv_options column_mapping --- include/jsoncons_ext/csv/csv_options.hpp | 2 +- test/csv/src/csv_encoder_tests.cpp | 70 ++++++++++++++++++++++-- 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_options.hpp b/include/jsoncons_ext/csv/csv_options.hpp index c977bfadbb..09cd0e40db 100644 --- a/include/jsoncons_ext/csv/csv_options.hpp +++ b/include/jsoncons_ext/csv/csv_options.hpp @@ -812,7 +812,7 @@ class basic_csv_options final : public basic_csv_decode_options, public b return *this; } - basic_csv_options& column_mapping(const std::vector& value) + basic_csv_options& column_mapping(const std::vector>& value) { this->column_mapping_ = value; return *this; diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index ccf8fac2e7..28f36d3f4e 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,7 +9,7 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -228,7 +228,7 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } -//#endif +#endif SECTION("array of subarrays to csv") { @@ -258,9 +258,71 @@ NY,LON,TOR;LON } -TEST_CASE("test json to non-flat csv") +TEST_CASE("test json to non-flat csv with column mappings") { //#if 0 + SECTION("array of objects to csv") + { + std::string expected = R"(Number,Date Time +1.0,1971-01-01T04:14:00 +1.27,1948-01-01T14:57:13 +)"; + + std::string jtext = R"( +[ + { + "text": "Chicago Reader", + "float": 1.0, + "datetime": "1971-01-01T04:14:00", + "boolean": true, + "nested": { + "time": "04:14:00", + "nested": { + "date": "1971-01-01", + "integer": 40 + } + } + }, + { + "text": "Chicago Sun-Times", + "float": 1.27, + "datetime": "1948-01-01T14:57:13", + "boolean": true, + "nested": { + "time": "14:57:13", + "nested": { + "date": "1948-01-01", + "integer": 63 + } + } + } +] + )"; + + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .flat(false) + .column_mapping({ + {"/float","Number"}, + {"/datetime","Date Time"} + }); + + std::string buf; + csv::csv_string_encoder encoder(buf, options); + j.dump(encoder); + + //std::cout << buf << "\n"; + + CHECK(expected == buf); + } +} + +TEST_CASE("test json to non-flat csv") +{ +#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -451,6 +513,6 @@ NY,LON,TOR;LON CHECK(expected == buf); } -//#endif +#endif } From 43995f51018f38f446867f6ae85c983f61c299c2 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Sun, 19 Jan 2025 18:49:47 -0500 Subject: [PATCH 48/79] csv_options column_mapping --- include/jsoncons_ext/csv/csv_encoder.hpp | 61 ++++++++++++++---------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 3af223d37d..c8f878feb7 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -131,7 +131,18 @@ class basic_csv_encoder final : public basic_json_visitor buffer_(alloc), value_buffer_(alloc) { - jsoncons::csv::detail::parse_column_names(options.column_names(), column_names_); + if (!options.column_mapping().empty()) + { + for (const auto& item : options.column_mapping()) + { + column_paths_.emplace_back(item.first); + column_path_name_map_.emplace(item.first, item.second); + } + } + else + { + jsoncons::csv::detail::parse_column_names(options.column_names(), column_names_); + } } ~basic_csv_encoder() noexcept @@ -378,11 +389,11 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - if (options_.column_names().empty()) + if (options_.column_mapping().empty()) { column_paths_.emplace_back(stack_.back().column_path_); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); @@ -468,13 +479,19 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - for (std::size_t i = 0; i < column_names_.size(); ++i) + std::size_t col = 0; + for (std::size_t i = 0; i < column_paths_.size(); ++i) { - if (i > 0) + auto it = column_path_name_map_.find(column_paths_[i]); + if (it != column_path_name_map_.end()) { - sink_.push_back(options_.field_delimiter()); + if (col > 0) + { + sink_.push_back(options_.field_delimiter()); + } + sink_.append(it->second.data(), it->second.length()); + ++col; } - sink_.append(column_names_[i].data(), column_names_[i].length()); } sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); @@ -571,13 +588,15 @@ class basic_csv_encoder final : public basic_json_visitor stack_.back().column_path_ = stack_[stack_.size()-2].column_path_; stack_.back().column_path_.push_back('/'); stack_.back().column_path_.append(buffer_); - if (stack_[0].count_ == 0 && options_.column_names().empty()) + if (stack_[0].count_ == 0) { - column_names_.emplace_back(stack_.back().column_path_); - column_paths_.emplace_back(stack_.back().column_path_); - if (stack_.back().item_kind_ == stack_item_kind::row) + if (options_.column_mapping().empty()) { - column_path_name_map_.emplace(stack_.back().column_path_, stack_.back().column_path_); + column_paths_.emplace_back(stack_.back().column_path_); + if (stack_.back().item_kind_ == stack_item_kind::row) + { + column_path_name_map_.emplace(stack_.back().column_path_, stack_.back().column_path_); + } } } } @@ -592,9 +611,8 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - if (options_.column_names().empty()) + if (options_.column_mapping().empty()) { - column_names_.emplace_back(stack_.back().pathname_); column_paths_.emplace_back(stack_.back().column_path_); } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); @@ -664,9 +682,8 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - if (options_.column_names().empty()) + if (options_.column_mapping().empty()) { - column_names_.emplace_back(stack_.back().column_path_); column_paths_.emplace_back(stack_.back().column_path_); } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); @@ -790,9 +807,8 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - if (options_.column_names().empty()) + if (options_.column_mapping().empty()) { - column_names_.emplace_back(stack_.back().pathname_); column_paths_.emplace_back(stack_.back().column_path_); } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); @@ -865,9 +881,8 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - if (options_.column_names().empty()) + if (options_.column_mapping().empty()) { - column_names_.emplace_back(stack_.back().pathname_); column_paths_.emplace_back(stack_.back().column_path_); } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); @@ -940,9 +955,8 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - if (options_.column_names().empty()) + if (options_.column_mapping().empty()) { - column_names_.emplace_back(stack_.back().pathname_); column_paths_.emplace_back(stack_.back().column_path_); } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); @@ -1012,9 +1026,8 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - if (options_.column_names().empty()) + if (options_.column_mapping().empty()) { - column_names_.emplace_back(stack_.back().pathname_); column_paths_.emplace_back(stack_.back().column_path_); } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); From 997092ae1a8db3d3ebb3f41724b6c4480c4411fc Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Sun, 19 Jan 2025 18:53:21 -0500 Subject: [PATCH 49/79] csv_options column_mapping --- test/csv/src/csv_encoder_tests.cpp | 62 ++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 28f36d3f4e..358cb21778 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -7,6 +7,68 @@ namespace csv = jsoncons::csv; +TEST_CASE("test json to flat csv with column mappings") +{ +//#if 0 + SECTION("array of objects to csv") + { + std::string expected = R"(Number,Date Time +1.0,1971-01-01T04:14:00 +1.27,1948-01-01T14:57:13 +)"; + + std::string jtext = R"( +[ + { + "text": "Chicago Reader", + "float": 1.0, + "datetime": "1971-01-01T04:14:00", + "boolean": true, + "nested": { + "time": "04:14:00", + "nested": { + "date": "1971-01-01", + "integer": 40 + } + } + }, + { + "text": "Chicago Sun-Times", + "float": 1.27, + "datetime": "1948-01-01T14:57:13", + "boolean": true, + "nested": { + "time": "14:57:13", + "nested": { + "date": "1948-01-01", + "integer": 63 + } + } + } +] + )"; + + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .flat(true) + .column_mapping({ + {"/float","Number"}, + {"/datetime","Date Time"} + }); + + std::string buf; + csv::csv_string_encoder encoder(buf, options); + j.dump(encoder); + + //std::cout << buf << "\n"; + + CHECK(expected == buf); + } +} + TEST_CASE("test json to flat csv") { #if 0 From be31af33dcb2ef1db10e0e536d9bf970a68af67d Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Sun, 19 Jan 2025 19:44:52 -0500 Subject: [PATCH 50/79] csv_options column_mapping --- test/csv/src/csv_encoder_tests.cpp | 55 ++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 358cb21778..51cac6cbce 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -67,6 +67,61 @@ TEST_CASE("test json to flat csv with column mappings") CHECK(expected == buf); } + + SECTION("array of arrays to csv") + { + std::string expected = R"(Date Time,Newspaper +1971-01-01T04:14:00,Chicago Reader +1948-01-01T14:57:13,Chicago Sun-Times +)"; + + std::string jtext = R"( +[ + [ + "Chicago Reader", + 1.0, + "1971-01-01T04:14:00", + true, + [ + "04:14:00", + [ + "1971-01-01", + 40 + ] + ] + ], + [ + "Chicago Sun-Times", + 1.27, + "1948-01-01T14:57:13", + true, + [ + "14:57:13", + [ + "1948-01-01", + 63 + ] + ] + ] +] + )"; + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .flat(true) + .column_mapping({ + {"/2","Date Time"}, + {"/0","Newspaper"} + }); + + std::string buf; + csv::csv_string_encoder encoder(buf, options); + j.dump(encoder); + + CHECK(expected == buf); + } } TEST_CASE("test json to flat csv") From 264caad476f4ed9fb79f3e348f2e6b9b7a3ae712 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Sun, 19 Jan 2025 21:34:39 -0500 Subject: [PATCH 51/79] csv_options column_mapping --- test/csv/src/csv_encoder_tests.cpp | 64 ++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 51cac6cbce..7300b903f2 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,7 +9,7 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv with column mappings") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(Number,Date Time @@ -122,6 +122,7 @@ TEST_CASE("test json to flat csv with column mappings") CHECK(expected == buf); } +#endif } TEST_CASE("test json to flat csv") @@ -345,7 +346,6 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } -#endif SECTION("array of subarrays to csv") { @@ -372,12 +372,12 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } - +#endif } TEST_CASE("test json to non-flat csv with column mappings") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(Number,Date Time @@ -435,6 +435,62 @@ TEST_CASE("test json to non-flat csv with column mappings") CHECK(expected == buf); } +#endif + SECTION("array of arrays to csv") + { + std::string expected = R"(Date Time,Newspaper,No Pages +1971-01-01T04:14:00,Chicago Reader,40 +1948-01-01T14:57:13,Chicago Sun-Times,63 +)"; + + std::string jtext = R"( +[ + [ + "Chicago Reader", + 1.0, + "1971-01-01T04:14:00", + false, + [ + "04:14:00", + [ + "1971-01-01", + 40 + ] + ] + ], + [ + "Chicago Sun-Times", + 1.27, + "1948-01-01T14:57:13", + true, + [ + "14:57:13", + [ + "1948-01-01", + 63 + ] + ] + ] +] + )"; + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .flat(false) + .column_mapping({ + {"/2","Date Time"}, + {"/0","Newspaper"}, + {"/3/0/1", "No Pages"} + }); + + std::string buf; + csv::csv_string_encoder encoder(buf, options); + j.dump(encoder); + + CHECK(expected == buf); + } } TEST_CASE("test json to non-flat csv") From 60795de07406819d53fb246984146fb7e4554a80 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Sun, 19 Jan 2025 22:03:25 -0500 Subject: [PATCH 52/79] csv_options column_mapping --- include/jsoncons_ext/csv/csv_encoder.hpp | 85 ++++++++++++------------ 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index c8f878feb7..7841eb116a 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -104,6 +104,7 @@ class basic_csv_encoder final : public basic_json_visitor std::vector column_names_; std::vector column_paths_; + std::vector column_rows_; std::unordered_map,std::equal_to,string_string_allocator_type> column_path_value_map_; std::unordered_map,std::equal_to,string_string_allocator_type> column_path_name_map_; @@ -343,24 +344,6 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::object: stack_.emplace_back(stack_item_kind::object); break; - case stack_item_kind::column_mapping: - stack_.emplace_back(stack_item_kind::column); - row_counts_.push_back(1); - if (column_paths_.size() <= row_counts_.back()) - { - column_paths_.emplace_back(); - } - break; - case stack_item_kind::column: - { - if (column_paths_.size() <= row_counts_.back()) - { - column_paths_.emplace_back(); - } - begin_value(column_paths_[row_counts_.back()]); - stack_.emplace_back(stack_item_kind::column_multivalued_field); - break; - } case stack_item_kind::row: stack_.emplace_back(stack_item_kind::row); break; @@ -402,6 +385,24 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::multivalued_field: stack_.emplace_back(stack_item_kind::unmapped); break; + case stack_item_kind::column_mapping: + stack_.emplace_back(stack_item_kind::column); + row_counts_.push_back(1); + if (column_rows_.size() <= row_counts_.back()) + { + column_rows_.emplace_back(); + } + break; + case stack_item_kind::column: + { + if (column_rows_.size() <= row_counts_.back()) + { + column_rows_.emplace_back(); + } + begin_value(column_rows_[row_counts_.back()]); + stack_.emplace_back(stack_item_kind::column_multivalued_field); + break; + } case stack_item_kind::column_multivalued_field: break; case stack_item_kind::unmapped: @@ -654,16 +655,16 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_paths_.size() <= row_counts_.back()) + if (column_rows_.size() <= row_counts_.back()) { - column_paths_.emplace_back(); + column_rows_.emplace_back(); } - write_null_value(column_paths_[row_counts_.back()]); + write_null_value(column_rows_[row_counts_.back()]); break; } case stack_item_kind::column_multivalued_field: { - write_null_value(column_paths_[row_counts_.back()]); + write_null_value(column_rows_[row_counts_.back()]); break; } default: @@ -721,16 +722,16 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_paths_.size() <= row_counts_.back()) + if (column_rows_.size() <= row_counts_.back()) { - column_paths_.emplace_back(); + column_rows_.emplace_back(); } - write_string_value(sv, column_paths_[row_counts_.back()]); + write_string_value(sv, column_rows_[row_counts_.back()]); break; } case stack_item_kind::column_multivalued_field: { - write_string_value(sv, column_paths_[row_counts_.back()]); + write_string_value(sv, column_rows_[row_counts_.back()]); break; } default: @@ -850,16 +851,16 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_paths_.size() <= row_counts_.back()) + if (column_rows_.size() <= row_counts_.back()) { - column_paths_.emplace_back(); + column_rows_.emplace_back(); } - write_double_value(val, context, column_paths_[row_counts_.back()], ec); + write_double_value(val, context, column_rows_[row_counts_.back()], ec); break; } case stack_item_kind::column_multivalued_field: { - write_double_value(val, context, column_paths_[row_counts_.back()], ec); + write_double_value(val, context, column_rows_[row_counts_.back()], ec); break; } default: @@ -924,16 +925,16 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_paths_.size() <= row_counts_.back()) + if (column_rows_.size() <= row_counts_.back()) { - column_paths_.emplace_back(); + column_rows_.emplace_back(); } - write_int64_value(val, column_paths_[row_counts_.back()]); + write_int64_value(val, column_rows_[row_counts_.back()]); break; } case stack_item_kind::column_multivalued_field: { - write_int64_value(val, column_paths_[row_counts_.back()]); + write_int64_value(val, column_rows_[row_counts_.back()]); break; } default: @@ -998,16 +999,16 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_paths_.size() <= row_counts_.back()) + if (column_rows_.size() <= row_counts_.back()) { - column_paths_.emplace_back(); + column_rows_.emplace_back(); } - write_uint64_value(val, column_paths_[row_counts_.back()]); + write_uint64_value(val, column_rows_[row_counts_.back()]); break; } case stack_item_kind::column_multivalued_field: { - write_uint64_value(val, column_paths_[row_counts_.back()]); + write_uint64_value(val, column_rows_[row_counts_.back()]); break; } default: @@ -1069,16 +1070,16 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_paths_.size() <= row_counts_.back()) + if (column_rows_.size() <= row_counts_.back()) { - column_paths_.emplace_back(); + column_rows_.emplace_back(); } - write_bool_value(val, column_paths_[row_counts_.back()]); + write_bool_value(val, column_rows_[row_counts_.back()]); break; } case stack_item_kind::column_multivalued_field: { - write_bool_value(val, column_paths_[row_counts_.back()]); + write_bool_value(val, column_rows_[row_counts_.back()]); break; } default: From 2498fcb6ead7f37f8af64a6a75741df66da110d8 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Mon, 20 Jan 2025 12:38:17 -0500 Subject: [PATCH 53/79] csv_options column_mapping --- include/jsoncons_ext/csv/csv_encoder.hpp | 13 ++++++- test/csv/src/csv_encoder_tests.cpp | 48 ++++++++++++------------ 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 7841eb116a..82c08cb417 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -140,7 +140,7 @@ class basic_csv_encoder final : public basic_json_visitor column_path_name_map_.emplace(item.first, item.second); } } - else + else if (!options.column_names().empty()) { jsoncons::csv::detail::parse_column_names(options.column_names(), column_names_); } @@ -294,7 +294,18 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::column_mapping: { + bool first = true; for (const auto& item : column_paths_) + { + if (!first) + { + sink_.push_back(options_.field_delimiter()); + first = false; + } + sink_.append(item.data(), item.size()); + } + //sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + for (const auto& item : column_rows_) { sink_.append(item.data(), item.size()); sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 7300b903f2..ba8d63ccb3 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -316,21 +316,17 @@ NY,LON,TOR;LON CHECK(expected == buf); } - - SECTION("object of arrays and subarrays to csv") + + SECTION("array of subarrays to csv") { - std::string expected = R"(a,b,c -1;true;null,7;8;9,15 --4;5.5;6,10;11;12,16 -,,17 + std::string expected = R"(1;2;3,4;5;6 +7;8;9,10;11;12 )"; - const std::string jtext = R"( -{ - "a" : [[1,true,null],[-4,5.5,"6"]], - "b" : [[7,8,9],[10,11,12]], - "c" : [15,16,17] -} +[ + [[1,2,3],[4,5,6]], + [[7,8,9],[10,11,12]] +] )"; auto j = jsoncons::json::parse(jtext); @@ -345,18 +341,23 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); - } - - SECTION("array of subarrays to csv") + } +#endif + + SECTION("object of arrays and subarrays to csv") { - std::string expected = R"(1;2;3,4;5;6 -7;8;9,10;11;12 + std::string expected = R"(a,b,c +1;true;null,7;8;9,15 +-4;5.5;6,10;11;12,16 +,,17 )"; + const std::string jtext = R"( -[ - [[1,2,3],[4,5,6]], - [[7,8,9],[10,11,12]] -] +{ + "a" : [[1,true,null],[-4,5.5,"6"]], + "b" : [[7,8,9],[10,11,12]], + "c" : [15,16,17] +} )"; auto j = jsoncons::json::parse(jtext); @@ -371,8 +372,7 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); - } -#endif + } } TEST_CASE("test json to non-flat csv with column mappings") @@ -435,7 +435,6 @@ TEST_CASE("test json to non-flat csv with column mappings") CHECK(expected == buf); } -#endif SECTION("array of arrays to csv") { std::string expected = R"(Date Time,Newspaper,No Pages @@ -491,6 +490,7 @@ TEST_CASE("test json to non-flat csv with column mappings") CHECK(expected == buf); } +#endif } TEST_CASE("test json to non-flat csv") From a35c1a69821f4d4290723d9384437a2d47961c1d Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Tue, 21 Jan 2025 12:10:52 -0500 Subject: [PATCH 54/79] csv_options column_mapping --- include/jsoncons_ext/csv/csv_encoder.hpp | 264 ++++++++--------------- 1 file changed, 92 insertions(+), 172 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 82c08cb417..168e442cbf 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -39,7 +39,9 @@ class basic_csv_encoder final : public basic_json_visitor using string_type = std::basic_string, char_allocator_type>; using string_allocator_type = typename std::allocator_traits:: template rebind_alloc; using string_string_allocator_type = typename std::allocator_traits:: template rebind_alloc>; - + using string_vector_allocator_type = typename std::allocator_traits:: template rebind_alloc>>; + using column_type = std::vector; + using column_path_column_map_type = std::unordered_map,std::equal_to,string_vector_allocator_type>; private: static jsoncons::basic_string_view null_constant() { @@ -104,14 +106,14 @@ class basic_csv_encoder final : public basic_json_visitor std::vector column_names_; std::vector column_paths_; - std::vector column_rows_; std::unordered_map,std::equal_to,string_string_allocator_type> column_path_value_map_; std::unordered_map,std::equal_to,string_string_allocator_type> column_path_name_map_; + column_path_column_map_type column_path_column_map_; std::size_t column_index_{0}; - std::vector row_counts_; string_type buffer_; string_type value_buffer_; + column_path_column_map_type::iterator column_it_; // Noncopyable and nonmoveable basic_csv_encoder(const basic_csv_encoder&) = delete; @@ -130,7 +132,9 @@ class basic_csv_encoder final : public basic_json_visitor alloc_(alloc), fp_(options.float_format(), options.precision()), buffer_(alloc), - value_buffer_(alloc) + value_buffer_(alloc), + column_path_column_map_(alloc), + column_it_(column_path_column_map_.end()) { if (!options.column_mapping().empty()) { @@ -163,7 +167,6 @@ class basic_csv_encoder final : public basic_json_visitor column_names_.clear(); column_path_value_map_.clear(); column_index_ = 0; - row_counts_.clear(); } void reset(Sink&& sink) @@ -238,7 +241,7 @@ class basic_csv_encoder final : public basic_json_visitor stack_.emplace_back(stack_item_kind::unmapped); break; default: // error - std::cout << "visit_begin_object " << (int)stack_.back().item_kind_ << "\n"; + //std::cout << "visit_begin_object " << (int)stack_.back().item_kind_ << "\n"; ec = csv_errc::source_error; return false; } @@ -304,11 +307,57 @@ class basic_csv_encoder final : public basic_json_visitor } sink_.append(item.data(), item.size()); } - //sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); - for (const auto& item : column_rows_) + sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + + std::vector> columns; + for (const auto& item : column_path_column_map_) { - sink_.append(item.data(), item.size()); - sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + columns.emplace_back(item.second.cbegin(), item.second.cend()); + } + + if (!columns.empty()) + { + const std::size_t no_cols = columns.size(); + bool done = false; + while (!done) + { + std::size_t missing_cols = 0; + + first = true; + for (auto& item : columns) + { + if (item.first == item.second) + { + ++missing_cols; + if (missing_cols == no_cols) + { + done = true; + break; + } + } + else + { + for (std::size_t i = 0; i < missing_cols; ++i) + { + sink_.push_back(options_.field_delimiter()); + } + if (!first) + { + sink_.push_back(options_.field_delimiter()); + } + else + { + first = false; + } + sink_.append((*(item.first)).data(), (*(item.first)).size()); + ++item.first; + } + } + if (!done) + { + sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + } + } } break; } @@ -317,14 +366,14 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::unmapped: break; default: - std::cout << "visit_end_object " << (int)stack_.back().item_kind_ << "\n"; + //std::cout << "visit_end_object " << (int)stack_.back().item_kind_ << "\n"; ec = csv_errc::source_error; return false; } stack_.pop_back(); if (!stack_.empty()) { - end_value(); + ++stack_.back().count_; } return true; } @@ -398,29 +447,21 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::column_mapping: stack_.emplace_back(stack_item_kind::column); - row_counts_.push_back(1); - if (column_rows_.size() <= row_counts_.back()) - { - column_rows_.emplace_back(); - } break; case stack_item_kind::column: { - if (column_rows_.size() <= row_counts_.back()) - { - column_rows_.emplace_back(); - } - begin_value(column_rows_[row_counts_.back()]); + value_buffer_.clear(); stack_.emplace_back(stack_item_kind::column_multivalued_field); break; } case stack_item_kind::column_multivalued_field: + stack_.emplace_back(stack_item_kind::unmapped); break; case stack_item_kind::unmapped: stack_.emplace_back(stack_item_kind::unmapped); break; default: // error - std::cout << "visit_begin_array " << (int)stack_.back().item_kind_ << "\n"; + //std::cout << "visit_begin_array " << (int)stack_.back().item_kind_ << "\n"; ec = csv_errc::source_error; return false; } @@ -529,11 +570,12 @@ class basic_csv_encoder final : public basic_json_visitor ++column_index_; break; case stack_item_kind::column_multivalued_field: + column_it_->second.emplace_back(value_buffer_.data(),value_buffer_.length()); break; case stack_item_kind::unmapped: break; default: - std::cout << "visit_end_array " << (int)stack_.back().item_kind_ << "\n"; + //std::cout << "visit_end_array " << (int)stack_.back().item_kind_ << "\n"; ec = csv_errc::source_error; return false; } @@ -541,7 +583,7 @@ class basic_csv_encoder final : public basic_json_visitor if (!stack_.empty()) { - end_value(); + ++stack_.back().count_; } return true; } @@ -584,6 +626,7 @@ class basic_csv_encoder final : public basic_json_visitor column_paths_[0].push_back(options_.field_delimiter()); column_paths_[0].append(string_type(name)); } + column_it_ = column_path_column_map_.emplace(string_type(name), column_type{}).first; break; } default: @@ -655,6 +698,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } + case stack_item_kind::column_multivalued_field: case stack_item_kind::multivalued_field: { if (!value_buffer_.empty()) @@ -666,16 +710,8 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_rows_.size() <= row_counts_.back()) - { - column_rows_.emplace_back(); - } - write_null_value(column_rows_[row_counts_.back()]); - break; - } - case stack_item_kind::column_multivalued_field: - { - write_null_value(column_rows_[row_counts_.back()]); + (*column_it_).second.emplace_back(); + write_null_value((*column_it_).second.back()); break; } default: @@ -722,6 +758,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } + case stack_item_kind::column_multivalued_field: case stack_item_kind::multivalued_field: { if (!value_buffer_.empty()) @@ -733,16 +770,8 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_rows_.size() <= row_counts_.back()) - { - column_rows_.emplace_back(); - } - write_string_value(sv, column_rows_[row_counts_.back()]); - break; - } - case stack_item_kind::column_multivalued_field: - { - write_string_value(sv, column_rows_[row_counts_.back()]); + (*column_it_).second.emplace_back(); + write_string_value(sv, (*column_it_).second.back()); break; } default: @@ -852,6 +881,7 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::multivalued_field: + case stack_item_kind::column_multivalued_field: { if (!value_buffer_.empty()) { @@ -862,16 +892,8 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_rows_.size() <= row_counts_.back()) - { - column_rows_.emplace_back(); - } - write_double_value(val, context, column_rows_[row_counts_.back()], ec); - break; - } - case stack_item_kind::column_multivalued_field: - { - write_double_value(val, context, column_rows_[row_counts_.back()], ec); + (*column_it_).second.emplace_back(); + write_double_value(val, context, (*column_it_).second.back(), ec); break; } default: @@ -925,6 +947,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; } + case stack_item_kind::column_multivalued_field: case stack_item_kind::multivalued_field: { if (!value_buffer_.empty()) @@ -936,16 +959,8 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_rows_.size() <= row_counts_.back()) - { - column_rows_.emplace_back(); - } - write_int64_value(val, column_rows_[row_counts_.back()]); - break; - } - case stack_item_kind::column_multivalued_field: - { - write_int64_value(val, column_rows_[row_counts_.back()]); + (*column_it_).second.emplace_back(); + write_int64_value(val, (*column_it_).second.back()); break; } default: @@ -1000,6 +1015,7 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::multivalued_field: + case stack_item_kind::column_multivalued_field: { if (!value_buffer_.empty()) { @@ -1010,16 +1026,8 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_rows_.size() <= row_counts_.back()) - { - column_rows_.emplace_back(); - } - write_uint64_value(val, column_rows_[row_counts_.back()]); - break; - } - case stack_item_kind::column_multivalued_field: - { - write_uint64_value(val, column_rows_[row_counts_.back()]); + (*column_it_).second.emplace_back(); + write_uint64_value(val, (*column_it_).second.back()); break; } default: @@ -1071,6 +1079,7 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::multivalued_field: + case stack_item_kind::column_multivalued_field: { if (!value_buffer_.empty()) { @@ -1081,16 +1090,8 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column: { - if (column_rows_.size() <= row_counts_.back()) - { - column_rows_.emplace_back(); - } - write_bool_value(val, column_rows_[row_counts_.back()]); - break; - } - case stack_item_kind::column_multivalued_field: - { - write_bool_value(val, column_rows_[row_counts_.back()]); + (*column_it_).second.emplace_back(); + write_bool_value(val, (*column_it_).second.back()); break; } default: @@ -1120,15 +1121,12 @@ class basic_csv_encoder final : public basic_json_visitor void write_string_value(const string_view_type& value, string_type& str) { - begin_value(str); do_string_value(value.data(),value.length(), str); - end_value(); + ++stack_.back().count_; } void write_double_value(double val, const ser_context& context, string_type& str, std::error_code& ec) { - begin_value(str); - if (!std::isfinite(val)) { if ((std::isnan)(val)) @@ -1182,32 +1180,26 @@ class basic_csv_encoder final : public basic_json_visitor fp_(val, str); } - end_value(); + ++stack_.back().count_; } void write_int64_value(int64_t val, string_type& str) { - begin_value(str); - jsoncons::detail::from_integer(val,str); - end_value(); + ++stack_.back().count_; } void write_uint64_value(uint64_t val, string_type& str) { - begin_value(str); - jsoncons::detail::from_integer(val,str); - end_value(); + ++stack_.back().count_; } void write_bool_value(bool val, string_type& str) { - begin_value(str); - if (val) { str.append(true_constant().data(), true_constant().size()); @@ -1217,87 +1209,15 @@ class basic_csv_encoder final : public basic_json_visitor str.append(false_constant().data(), false_constant().size()); } - end_value(); + ++stack_.back().count_; } bool write_null_value(string_type& str) { - begin_value(str); str.append(null_constant().data(), null_constant().size()); - end_value(); + ++stack_.back().count_; return true; } - - void begin_value(string_type& str) - { - if (stack_.empty()) - { - return; - } - switch (stack_.back().item_kind_) - { - case stack_item_kind::flat_row: - case stack_item_kind::row: - break; - case stack_item_kind::column: - { - if (row_counts_.size() >= 3) - { - for (std::size_t i = row_counts_.size()-2; i-- > 0;) - { - if (row_counts_[i] <= row_counts_.back()) - { - str.push_back(options_.field_delimiter()); - } - else - { - break; - } - } - } - if (column_index_ > 0) - { - str.push_back(options_.field_delimiter()); - } - break; - } - case stack_item_kind::multivalued_field: - break; - case stack_item_kind::column_multivalued_field: - if (stack_.back().count_ > 0 && options_.subfield_delimiter() != char_type()) - { - str.push_back(options_.subfield_delimiter()); - } - break; - default: - break; - } - } - - void end_value() - { - if (stack_.empty()) - { - return; - } - switch(stack_.back().item_kind_) - { - case stack_item_kind::flat_row: - case stack_item_kind::row: - { - ++stack_.back().count_; - break; - } - case stack_item_kind::column: - { - ++row_counts_.back(); - break; - } - default: - ++stack_.back().count_; - break; - } - } }; using csv_stream_encoder = basic_csv_encoder; From b992695d0cb75293b59ac2e9f80f36e489489a0f Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Tue, 21 Jan 2025 12:29:52 -0500 Subject: [PATCH 55/79] csv_encoder options --- include/jsoncons_ext/csv/csv_encoder.hpp | 18 ++++++++++-------- test/csv/src/csv_encoder_tests.cpp | 16 ++++++++-------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 168e442cbf..a8e197ba7a 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -99,6 +99,7 @@ class basic_csv_encoder final : public basic_json_visitor Sink sink_; const basic_csv_encode_options options_; + bool has_column_mapping_; allocator_type alloc_; std::vector stack_; @@ -129,6 +130,7 @@ class basic_csv_encoder final : public basic_json_visitor const Allocator& alloc = Allocator()) : sink_(std::forward(sink)), options_(options), + has_column_mapping_(!options.column_mapping().empty()), alloc_(alloc), fp_(options.float_format(), options.precision()), buffer_(alloc), @@ -432,7 +434,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - if (options_.column_mapping().empty()) + if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); column_path_value_map_[stack_.back().column_path_] = std::basic_string(); @@ -645,7 +647,7 @@ class basic_csv_encoder final : public basic_json_visitor stack_.back().column_path_.append(buffer_); if (stack_[0].count_ == 0) { - if (options_.column_mapping().empty()) + if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); if (stack_.back().item_kind_ == stack_item_kind::row) @@ -666,7 +668,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - if (options_.column_mapping().empty()) + if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); } @@ -730,7 +732,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - if (options_.column_mapping().empty()) + if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); } @@ -848,7 +850,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - if (options_.column_mapping().empty()) + if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); } @@ -915,7 +917,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - if (options_.column_mapping().empty()) + if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); } @@ -982,7 +984,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - if (options_.column_mapping().empty()) + if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); } @@ -1046,7 +1048,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - if (options_.column_mapping().empty()) + if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); } diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index ba8d63ccb3..134ea39446 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,7 +9,7 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv with column mappings") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(Number,Date Time @@ -122,12 +122,12 @@ TEST_CASE("test json to flat csv with column mappings") CHECK(expected == buf); } -#endif +//#endif } TEST_CASE("test json to flat csv") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -342,7 +342,7 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } -#endif +//#endif SECTION("object of arrays and subarrays to csv") { @@ -377,7 +377,7 @@ NY,LON,TOR;LON TEST_CASE("test json to non-flat csv with column mappings") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(Number,Date Time @@ -490,12 +490,12 @@ TEST_CASE("test json to non-flat csv with column mappings") CHECK(expected == buf); } -#endif +//#endif } TEST_CASE("test json to non-flat csv") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -686,6 +686,6 @@ NY,LON,TOR;LON CHECK(expected == buf); } -#endif +//#endif } From 2e3389e0af7a254b59ea1f168dc1d96ce2b9d7a8 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Tue, 21 Jan 2025 12:37:33 -0500 Subject: [PATCH 56/79] csv_encoder options --- include/jsoncons_ext/csv/csv_encoder.hpp | 88 +++++++++++++----------- 1 file changed, 48 insertions(+), 40 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index a8e197ba7a..79aad84533 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -99,7 +99,11 @@ class basic_csv_encoder final : public basic_json_visitor Sink sink_; const basic_csv_encode_options options_; + bool flat_; bool has_column_mapping_; + char_type field_delimiter_; + char_type subfield_delimiter_; + string_type line_delimiter_; allocator_type alloc_; std::vector stack_; @@ -130,7 +134,11 @@ class basic_csv_encoder final : public basic_json_visitor const Allocator& alloc = Allocator()) : sink_(std::forward(sink)), options_(options), + flat_(options.flat()), has_column_mapping_(!options.column_mapping().empty()), + field_delimiter_(options.field_delimiter()), + subfield_delimiter_(options.subfield_delimiter()), + line_delimiter_(options.line_delimiter()), alloc_(alloc), fp_(options.float_format(), options.precision()), buffer_(alloc), @@ -225,7 +233,7 @@ class basic_csv_encoder final : public basic_json_visitor stack_.emplace_back(stack_item_kind::object); break; case stack_item_kind::flat_object: - if (options_.subfield_delimiter() == char_type()) + if (subfield_delimiter_ == char_type()) { stack_.emplace_back(stack_item_kind::unmapped); } @@ -273,19 +281,19 @@ class basic_csv_encoder final : public basic_json_visitor { if (col > 0) { - sink_.push_back(options_.field_delimiter()); + sink_.push_back(field_delimiter_); } sink_.append(it->second.data(), it->second.length()); ++col; } } - sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + sink_.append(line_delimiter_.data(), line_delimiter_.length()); } for (std::size_t i = 0; i < column_paths_.size(); ++i) { if (i > 0) { - sink_.push_back(options_.field_delimiter()); + sink_.push_back(field_delimiter_); } auto it = column_path_value_map_.find(column_paths_[i]); if (it != column_path_value_map_.end()) @@ -294,7 +302,7 @@ class basic_csv_encoder final : public basic_json_visitor it->second.clear(); } } - sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + sink_.append(line_delimiter_.data(), line_delimiter_.length()); } break; case stack_item_kind::column_mapping: @@ -304,12 +312,12 @@ class basic_csv_encoder final : public basic_json_visitor { if (!first) { - sink_.push_back(options_.field_delimiter()); + sink_.push_back(field_delimiter_); first = false; } sink_.append(item.data(), item.size()); } - sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + sink_.append(line_delimiter_.data(), line_delimiter_.length()); std::vector> columns; for (const auto& item : column_path_column_map_) @@ -341,11 +349,11 @@ class basic_csv_encoder final : public basic_json_visitor { for (std::size_t i = 0; i < missing_cols; ++i) { - sink_.push_back(options_.field_delimiter()); + sink_.push_back(field_delimiter_); } if (!first) { - sink_.push_back(options_.field_delimiter()); + sink_.push_back(field_delimiter_); } else { @@ -357,7 +365,7 @@ class basic_csv_encoder final : public basic_json_visitor } if (!done) { - sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + sink_.append(line_delimiter_.data(), line_delimiter_.length()); } } } @@ -384,7 +392,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_.empty()) { - if (options_.flat()) + if (flat_) { stack_.emplace_back(stack_item_kind::flat_row_mapping); } @@ -410,7 +418,7 @@ class basic_csv_encoder final : public basic_json_visitor stack_.emplace_back(stack_item_kind::row); break; case stack_item_kind::flat_row: - if (options_.subfield_delimiter() == char_type()) + if (subfield_delimiter_ == char_type()) { stack_.emplace_back(stack_item_kind::unmapped); } @@ -426,7 +434,7 @@ class basic_csv_encoder final : public basic_json_visitor } break; case stack_item_kind::flat_object: - if (options_.subfield_delimiter() == char_type()) + if (subfield_delimiter_ == char_type()) { stack_.emplace_back(stack_item_kind::unmapped); } @@ -495,20 +503,20 @@ class basic_csv_encoder final : public basic_json_visitor { if (col > 0) { - sink_.push_back(options_.field_delimiter()); + sink_.push_back(field_delimiter_); } sink_.append(it->second.data(), it->second.length()); ++col; } } - sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + sink_.append(line_delimiter_.data(), line_delimiter_.length()); } for (std::size_t i = 0; i < column_paths_.size(); ++i) { if (i > 0) { - sink_.push_back(options_.field_delimiter()); + sink_.push_back(field_delimiter_); } auto it = column_path_value_map_.find(column_paths_[i]); if (it != column_path_value_map_.end()) @@ -517,7 +525,7 @@ class basic_csv_encoder final : public basic_json_visitor it->second.clear(); } } - sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + sink_.append(line_delimiter_.data(), line_delimiter_.length()); } break; case stack_item_kind::multivalued_field: @@ -542,21 +550,21 @@ class basic_csv_encoder final : public basic_json_visitor { if (col > 0) { - sink_.push_back(options_.field_delimiter()); + sink_.push_back(field_delimiter_); } sink_.append(it->second.data(), it->second.length()); ++col; } } - sink_.append(options_.line_delimiter().data(), - options_.line_delimiter().length()); + sink_.append(line_delimiter_.data(), + line_delimiter_.length()); } for (std::size_t i = 0; i < column_paths_.size(); ++i) { if (i > 0) { - sink_.push_back(options_.field_delimiter()); + sink_.push_back(field_delimiter_); } auto it = column_path_value_map_.find(column_paths_[i]); if (it != column_path_value_map_.end()) @@ -565,7 +573,7 @@ class basic_csv_encoder final : public basic_json_visitor it->second.clear(); } } - sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + sink_.append(line_delimiter_.data(), line_delimiter_.length()); } break; case stack_item_kind::column: @@ -625,7 +633,7 @@ class basic_csv_encoder final : public basic_json_visitor } else { - column_paths_[0].push_back(options_.field_delimiter()); + column_paths_[0].push_back(field_delimiter_); column_paths_[0].append(string_type(name)); } column_it_ = column_path_column_map_.emplace(string_type(name), column_type{}).first; @@ -677,9 +685,9 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_path_value_map_.find(stack_.back().column_path_); if (it != column_path_value_map_.end()) { - if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + if (!it->second.empty() && subfield_delimiter_ != char_type()) { - it->second.push_back(options_.subfield_delimiter()); + it->second.push_back(subfield_delimiter_); } write_null_value(it->second); } @@ -705,7 +713,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (!value_buffer_.empty()) { - value_buffer_.push_back(options_.subfield_delimiter()); + value_buffer_.push_back(subfield_delimiter_); } write_null_value(value_buffer_); break; @@ -765,7 +773,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (!value_buffer_.empty()) { - value_buffer_.push_back(options_.subfield_delimiter()); + value_buffer_.push_back(subfield_delimiter_); } write_string_value(sv, value_buffer_); break; @@ -859,9 +867,9 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_path_value_map_.find(stack_.back().column_path_); if (it != column_path_value_map_.end()) { - if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + if (!it->second.empty() && subfield_delimiter_ != char_type()) { - it->second.push_back(options_.subfield_delimiter()); + it->second.push_back(subfield_delimiter_); } write_double_value(val, context, it->second, ec); } @@ -887,7 +895,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (!value_buffer_.empty()) { - value_buffer_.push_back(options_.subfield_delimiter()); + value_buffer_.push_back(subfield_delimiter_); } write_double_value(val, context, value_buffer_, ec); break; @@ -926,9 +934,9 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_path_value_map_.find(stack_.back().column_path_); if (it != column_path_value_map_.end()) { - if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + if (!it->second.empty() && subfield_delimiter_ != char_type()) { - it->second.push_back(options_.subfield_delimiter()); + it->second.push_back(subfield_delimiter_); } write_int64_value(val, it->second); } @@ -954,7 +962,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (!value_buffer_.empty()) { - value_buffer_.push_back(options_.subfield_delimiter()); + value_buffer_.push_back(subfield_delimiter_); } write_int64_value(val, value_buffer_); break; @@ -993,9 +1001,9 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_path_value_map_.find(stack_.back().column_path_); if (it != column_path_value_map_.end()) { - if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + if (!it->second.empty() && subfield_delimiter_ != char_type()) { - it->second.push_back(options_.subfield_delimiter()); + it->second.push_back(subfield_delimiter_); } write_uint64_value(val, it->second); } @@ -1021,7 +1029,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (!value_buffer_.empty()) { - value_buffer_.push_back(options_.subfield_delimiter()); + value_buffer_.push_back(subfield_delimiter_); } write_uint64_value(val, value_buffer_); break; @@ -1057,9 +1065,9 @@ class basic_csv_encoder final : public basic_json_visitor auto it = column_path_value_map_.find(stack_.back().column_path_); if (it != column_path_value_map_.end()) { - if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + if (!it->second.empty() && subfield_delimiter_ != char_type()) { - it->second.push_back(options_.subfield_delimiter()); + it->second.push_back(subfield_delimiter_); } write_bool_value(val, it->second); } @@ -1085,7 +1093,7 @@ class basic_csv_encoder final : public basic_json_visitor { if (!value_buffer_.empty()) { - value_buffer_.push_back(options_.subfield_delimiter()); + value_buffer_.push_back(subfield_delimiter_); } write_bool_value(val, value_buffer_); break; @@ -1107,7 +1115,7 @@ class basic_csv_encoder final : public basic_json_visitor bool quote = false; if (options_.quote_style() == quote_style_kind::all || options_.quote_style() == quote_style_kind::nonnumeric || (options_.quote_style() == quote_style_kind::minimal && - (std::char_traits::find(s, length, options_.field_delimiter()) != nullptr || std::char_traits::find(s, length, options_.quote_char()) != nullptr))) + (std::char_traits::find(s, length, field_delimiter_) != nullptr || std::char_traits::find(s, length, options_.quote_char()) != nullptr))) { quote = true; str.push_back(options_.quote_char()); From d7c41d4e7db66ab2110983c755c5f4d1a87469fb Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Tue, 21 Jan 2025 12:43:20 -0500 Subject: [PATCH 57/79] csv_encoder options --- include/jsoncons_ext/csv/csv_encoder.hpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 79aad84533..b4250e8723 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -104,6 +104,9 @@ class basic_csv_encoder final : public basic_json_visitor char_type field_delimiter_; char_type subfield_delimiter_; string_type line_delimiter_; + quote_style_kind quote_style_; + char_type quote_char_; + char_type quote_escape_char_; allocator_type alloc_; std::vector stack_; @@ -139,6 +142,9 @@ class basic_csv_encoder final : public basic_json_visitor field_delimiter_(options.field_delimiter()), subfield_delimiter_(options.subfield_delimiter()), line_delimiter_(options.line_delimiter()), + quote_style_(options.quote_style()), + quote_char_(options.quote_char()), + quote_escape_char_(options.quote_escape_char()), alloc_(alloc), fp_(options.float_format(), options.precision()), buffer_(alloc), @@ -1113,17 +1119,17 @@ class basic_csv_encoder final : public basic_json_visitor bool do_string_value(const CharT* s, std::size_t length, string_type& str) { bool quote = false; - if (options_.quote_style() == quote_style_kind::all || options_.quote_style() == quote_style_kind::nonnumeric || - (options_.quote_style() == quote_style_kind::minimal && - (std::char_traits::find(s, length, field_delimiter_) != nullptr || std::char_traits::find(s, length, options_.quote_char()) != nullptr))) + if (quote_style_ == quote_style_kind::all || quote_style_ == quote_style_kind::nonnumeric || + (quote_style_ == quote_style_kind::minimal && + (std::char_traits::find(s, length, field_delimiter_) != nullptr || std::char_traits::find(s, length, quote_char_) != nullptr))) { quote = true; - str.push_back(options_.quote_char()); + str.push_back(quote_char_); } - escape_string(s, length, options_.quote_char(), options_.quote_escape_char(), str); + escape_string(s, length, quote_char_, quote_escape_char_, str); if (quote) { - str.push_back(options_.quote_char()); + str.push_back(quote_char_); } return true; From 702da33ee2bd60c9dc00308cca9e75171854cace Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Tue, 21 Jan 2025 13:06:02 -0500 Subject: [PATCH 58/79] csv_encoder options --- include/jsoncons_ext/csv/csv_encoder.hpp | 56 +++++++++++++++++------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index b4250e8723..8570a5ace4 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -98,15 +98,27 @@ class basic_csv_encoder final : public basic_json_visitor }; Sink sink_; - const basic_csv_encode_options options_; bool flat_; bool has_column_mapping_; + bool has_column_names_; char_type field_delimiter_; char_type subfield_delimiter_; string_type line_delimiter_; quote_style_kind quote_style_; char_type quote_char_; char_type quote_escape_char_; + bool enable_nan_to_num_; + string_type nan_to_num_; + bool enable_nan_to_str_; + string_type nan_to_str_; + bool enable_inf_to_num_; + string_type inf_to_num_; + bool enable_inf_to_str_; + string_type inf_to_str_; + bool enable_neginf_to_num_; + string_type neginf_to_num_; + bool enable_neginf_to_str_; + string_type neginf_to_str_; allocator_type alloc_; std::vector stack_; @@ -136,15 +148,27 @@ class basic_csv_encoder final : public basic_json_visitor const basic_csv_encode_options& options, const Allocator& alloc = Allocator()) : sink_(std::forward(sink)), - options_(options), flat_(options.flat()), has_column_mapping_(!options.column_mapping().empty()), + has_column_names_(!options.column_names().empty()), field_delimiter_(options.field_delimiter()), subfield_delimiter_(options.subfield_delimiter()), line_delimiter_(options.line_delimiter()), quote_style_(options.quote_style()), quote_char_(options.quote_char()), quote_escape_char_(options.quote_escape_char()), + enable_nan_to_num_(options.enable_nan_to_num()), + nan_to_num_(options.nan_to_num()), + enable_nan_to_str_(options.enable_nan_to_str()), + nan_to_str_(options.nan_to_str()), + enable_inf_to_num_(options.enable_inf_to_num()), + inf_to_num_(options.inf_to_num()), + enable_inf_to_str_(options.enable_inf_to_str()), + inf_to_str_(options.inf_to_str()), + enable_neginf_to_num_(options.enable_neginf_to_num()), + neginf_to_num_(options.neginf_to_num()), + enable_neginf_to_str_(options.enable_neginf_to_str()), + neginf_to_str_(options.neginf_to_str()), alloc_(alloc), fp_(options.float_format(), options.precision()), buffer_(alloc), @@ -614,7 +638,7 @@ class basic_csv_encoder final : public basic_json_visitor stack_.back().column_path_ = stack_[stack_.size()-2].column_path_; stack_.back().column_path_.push_back('/'); stack_.back().column_path_.append(std::string(name)); - if (options_.column_names().empty()) + if (!has_column_names_) { column_path_name_map_.emplace(stack_.back().column_path_, name); } @@ -625,7 +649,7 @@ class basic_csv_encoder final : public basic_json_visitor stack_.back().column_path_ = stack_[stack_.size()-2].column_path_; stack_.back().column_path_.push_back('/'); stack_.back().column_path_.append(std::string(name)); - if (options_.column_names().empty()) + if (!has_column_names_) { column_path_name_map_.emplace(stack_.back().column_path_, stack_.back().column_path_); } @@ -1147,13 +1171,13 @@ class basic_csv_encoder final : public basic_json_visitor { if ((std::isnan)(val)) { - if (options_.enable_nan_to_num()) + if (enable_nan_to_num_) { - str.append(options_.nan_to_num().data(), options_.nan_to_num().length()); + str.append(nan_to_num_.data(), nan_to_num_.length()); } - else if (options_.enable_nan_to_str()) + else if (enable_nan_to_str_) { - visit_string(options_.nan_to_str(), semantic_tag::none, context, ec); + visit_string(nan_to_str_, semantic_tag::none, context, ec); } else { @@ -1162,13 +1186,13 @@ class basic_csv_encoder final : public basic_json_visitor } else if (val == std::numeric_limits::infinity()) { - if (options_.enable_inf_to_num()) + if (enable_inf_to_num_) { - str.append(options_.inf_to_num().data(), options_.inf_to_num().length()); + str.append(inf_to_num_.data(), inf_to_num_.length()); } - else if (options_.enable_inf_to_str()) + else if (enable_inf_to_str_) { - visit_string(options_.inf_to_str(), semantic_tag::none, context, ec); + visit_string(inf_to_str_, semantic_tag::none, context, ec); } else { @@ -1177,13 +1201,13 @@ class basic_csv_encoder final : public basic_json_visitor } else { - if (options_.enable_neginf_to_num()) + if (enable_neginf_to_num_) { - str.append(options_.neginf_to_num().data(), options_.neginf_to_num().length()); + str.append(neginf_to_num_.data(), neginf_to_num_.length()); } - else if (options_.enable_neginf_to_str()) + else if (enable_neginf_to_str_) { - visit_string(options_.neginf_to_str(), semantic_tag::none, context, ec); + visit_string(neginf_to_str_, semantic_tag::none, context, ec); } else { From 54fd0e3baf0c4da07f2aa53c2ef8ea1702d3fc63 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Tue, 21 Jan 2025 17:14:49 -0500 Subject: [PATCH 59/79] csv_options column_mapping --- include/jsoncons_ext/csv/csv_encoder.hpp | 165 +++++++++++------------ test/csv/src/csv_encoder_tests.cpp | 16 +-- 2 files changed, 89 insertions(+), 92 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 8570a5ace4..57d3fc531a 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -78,7 +78,6 @@ class basic_csv_encoder final : public basic_json_visitor { stack_item_kind item_kind_; std::size_t count_{0}; - std::string pathname_; std::string column_path_; stack_item(stack_item_kind item_kind) noexcept @@ -176,7 +175,7 @@ class basic_csv_encoder final : public basic_json_visitor column_path_column_map_(alloc), column_it_(column_path_column_map_.end()) { - if (!options.column_mapping().empty()) + if (has_column_mapping_) { for (const auto& item : options.column_mapping()) { @@ -184,7 +183,7 @@ class basic_csv_encoder final : public basic_json_visitor column_path_name_map_.emplace(item.first, item.second); } } - else if (!options.column_names().empty()) + else if (has_column_names_) { jsoncons::csv::detail::parse_column_names(options.column_names(), column_names_); } @@ -269,7 +268,6 @@ class basic_csv_encoder final : public basic_json_visitor } else { - stack_.back().pathname_ = stack_[stack_.size()-2].pathname_; stack_.back().column_path_ = stack_[stack_.size()-2].column_path_; value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); @@ -290,10 +288,7 @@ class basic_csv_encoder final : public basic_json_visitor bool visit_end_object(const ser_context&, std::error_code& ec) override { - if (stack_.empty()) - { - return true; - } + JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { @@ -336,71 +331,78 @@ class basic_csv_encoder final : public basic_json_visitor } break; case stack_item_kind::column_mapping: - { - bool first = true; - for (const auto& item : column_paths_) - { - if (!first) - { - sink_.push_back(field_delimiter_); - first = false; - } - sink_.append(item.data(), item.size()); - } - sink_.append(line_delimiter_.data(), line_delimiter_.length()); - - std::vector> columns; - for (const auto& item : column_path_column_map_) - { - columns.emplace_back(item.second.cbegin(), item.second.cend()); - } + { + // Write header + { + bool first = true; + for (std::size_t i = 0; i < column_paths_.size(); ++i) + { + auto it = column_path_name_map_.find(column_paths_[i]); + if (it != column_path_name_map_.end()) + { + if (!first) + { + sink_.push_back(field_delimiter_); + } + sink_.append(it->second.data(), it->second.length()); + first = false; + } + } + sink_.append(line_delimiter_.data(), line_delimiter_.length()); + } - if (!columns.empty()) - { - const std::size_t no_cols = columns.size(); - bool done = false; - while (!done) - { - std::size_t missing_cols = 0; - - first = true; - for (auto& item : columns) - { - if (item.first == item.second) - { - ++missing_cols; - if (missing_cols == no_cols) - { - done = true; - break; - } - } - else - { - for (std::size_t i = 0; i < missing_cols; ++i) - { - sink_.push_back(field_delimiter_); - } - if (!first) - { - sink_.push_back(field_delimiter_); - } - else - { - first = false; - } - sink_.append((*(item.first)).data(), (*(item.first)).size()); - ++item.first; - } - } - if (!done) - { - sink_.append(line_delimiter_.data(), line_delimiter_.length()); - } - } - } - break; - } + std::vector> columns; + for (const auto& item : column_path_column_map_) + { + columns.emplace_back(item.second.cbegin(), item.second.cend()); + } + + if (!columns.empty()) + { + const std::size_t no_cols = columns.size(); + bool done = false; + while (!done) + { + std::size_t missing_cols = 0; + + bool first = true; + for (auto& item : columns) + { + if (item.first == item.second) + { + ++missing_cols; + if (missing_cols == no_cols) + { + done = true; + break; + } + } + else + { + for (std::size_t i = 0; i < missing_cols; ++i) + { + sink_.push_back(field_delimiter_); + } + if (!first) + { + sink_.push_back(field_delimiter_); + } + else + { + first = false; + } + sink_.append((*(item.first)).data(), (*(item.first)).size()); + ++item.first; + } + } + if (!done) + { + sink_.append(line_delimiter_.data(), line_delimiter_.length()); + } + } + } + break; + } case stack_item_kind::column_multivalued_field: break; case stack_item_kind::unmapped: @@ -510,10 +512,7 @@ class basic_csv_encoder final : public basic_json_visitor bool visit_end_array(const ser_context&, std::error_code& ec) override { - if (stack_.empty()) - { - return true; - } + JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { @@ -657,16 +656,14 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column_mapping: { - if (column_paths_.empty()) - { - column_paths_.emplace_back(name); - } - else + stack_.back().column_path_.push_back('/'); + stack_.back().column_path_.append(std::string(name)); + if (!has_column_names_) { - column_paths_[0].push_back(field_delimiter_); - column_paths_[0].append(string_type(name)); + column_path_name_map_.emplace(stack_.back().column_path_, name); } - column_it_ = column_path_column_map_.emplace(string_type(name), column_type{}).first; + column_paths_.emplace_back(stack_.back().column_path_); + column_it_ = column_path_column_map_.emplace(stack_.back().column_path_, column_type{}).first; break; } default: diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 134ea39446..ba8d63ccb3 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,7 +9,7 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv with column mappings") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(Number,Date Time @@ -122,12 +122,12 @@ TEST_CASE("test json to flat csv with column mappings") CHECK(expected == buf); } -//#endif +#endif } TEST_CASE("test json to flat csv") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -342,7 +342,7 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } -//#endif +#endif SECTION("object of arrays and subarrays to csv") { @@ -377,7 +377,7 @@ NY,LON,TOR;LON TEST_CASE("test json to non-flat csv with column mappings") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(Number,Date Time @@ -490,12 +490,12 @@ TEST_CASE("test json to non-flat csv with column mappings") CHECK(expected == buf); } -//#endif +#endif } TEST_CASE("test json to non-flat csv") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -686,6 +686,6 @@ NY,LON,TOR;LON CHECK(expected == buf); } -//#endif +#endif } From 67367c267b686f1f91f434baf347582499df3140 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Tue, 21 Jan 2025 17:47:49 -0500 Subject: [PATCH 60/79] csv_encoder options --- include/jsoncons_ext/csv/csv_encoder.hpp | 17 +++++++----- test/csv/src/csv_encoder_tests.cpp | 33 ++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 57d3fc531a..56501f0ea3 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -352,9 +352,13 @@ class basic_csv_encoder final : public basic_json_visitor } std::vector> columns; - for (const auto& item : column_path_column_map_) + for (const auto& item : column_paths_) { - columns.emplace_back(item.second.cbegin(), item.second.cend()); + auto it = column_path_column_map_.find(item); + if (it != column_path_column_map_.end()) + { + columns.emplace_back((*it).second.cbegin(), (*it).second.cend()); + } } if (!columns.empty()) @@ -637,7 +641,7 @@ class basic_csv_encoder final : public basic_json_visitor stack_.back().column_path_ = stack_[stack_.size()-2].column_path_; stack_.back().column_path_.push_back('/'); stack_.back().column_path_.append(std::string(name)); - if (!has_column_names_) + if (!has_column_mapping_) { column_path_name_map_.emplace(stack_.back().column_path_, name); } @@ -648,7 +652,7 @@ class basic_csv_encoder final : public basic_json_visitor stack_.back().column_path_ = stack_[stack_.size()-2].column_path_; stack_.back().column_path_.push_back('/'); stack_.back().column_path_.append(std::string(name)); - if (!has_column_names_) + if (!has_column_mapping_) { column_path_name_map_.emplace(stack_.back().column_path_, stack_.back().column_path_); } @@ -656,13 +660,14 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::column_mapping: { + stack_.back().column_path_.erase(); stack_.back().column_path_.push_back('/'); stack_.back().column_path_.append(std::string(name)); - if (!has_column_names_) + if (!has_column_mapping_) { column_path_name_map_.emplace(stack_.back().column_path_, name); + column_paths_.emplace_back(stack_.back().column_path_); } - column_paths_.emplace_back(stack_.back().column_path_); column_it_ = column_path_column_map_.emplace(stack_.back().column_path_, column_type{}).first; break; } diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index ba8d63ccb3..86341025e9 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -373,6 +373,39 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } + + SECTION("object of arrays and subarrays to csv with column mapping") + { + std::string expected = R"(B,A +7;8;9,1;true;null +10;11;12,-4;5.5;6 +)"; + + const std::string jtext = R"( +{ + "a" : [[1,true,null],[-4,5.5,"6"]], + "b" : [[7,8,9],[10,11,12]], + "c" : [15,16,17] +} + )"; + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .subfield_delimiter(';') + .column_mapping({ + {"/b","B"}, + {"/a","A"} + }); + + std::string buf; + csv::csv_string_encoder encoder(buf, options); + j.dump(encoder); + + //std::cout << buf << "\n"; + CHECK(expected == buf); + } } TEST_CASE("test json to non-flat csv with column mappings") From a781bf12d7d59acbfd361641a3f36f90fef450ce Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 22 Jan 2025 10:35:53 -0500 Subject: [PATCH 61/79] csv_options column_names with missing column --- include/jsoncons_ext/csv/csv_encoder.hpp | 105 +++++++++++++---------- test/csv/src/csv_encoder_tests.cpp | 101 ++++++++++++++++++++-- test/csv/src/csv_tests.cpp | 6 +- 3 files changed, 156 insertions(+), 56 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 56501f0ea3..85b6465aa9 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -186,6 +186,15 @@ class basic_csv_encoder final : public basic_json_visitor else if (has_column_names_) { jsoncons::csv::detail::parse_column_names(options.column_names(), column_names_); + for (const auto& item : column_names_) + { + string_type str{alloc_}; + str.push_back('/'); + str.append(item.data(), item.size()); + column_paths_.emplace_back(str); + column_path_name_map_.emplace(std::move(str), item); + } + has_column_mapping_ = true; } } @@ -253,6 +262,7 @@ class basic_csv_encoder final : public basic_json_visitor switch (stack_.back().item_kind_) { case stack_item_kind::flat_row_mapping: + column_index_ = 0; stack_.emplace_back(stack_item_kind::flat_object); break; case stack_item_kind::row_mapping: @@ -298,21 +308,44 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - std::size_t col = 0; - for (std::size_t i = 0; i < column_paths_.size(); ++i) + if (has_column_names_) { - auto it = column_path_name_map_.find(column_paths_[i]); - if (it != column_path_name_map_.end()) + bool first = true; + for (const auto& item : column_names_) { - if (col > 0) + if (!first) { sink_.push_back(field_delimiter_); } - sink_.append(it->second.data(), it->second.length()); - ++col; + else + { + first = false; + } + sink_.append(item.data(), item.length()); } + sink_.append(line_delimiter_.data(), line_delimiter_.length()); + } + else + { + bool first = true; + for (std::size_t i = 0; i < column_paths_.size(); ++i) + { + auto it = column_path_name_map_.find(column_paths_[i]); + if (it != column_path_name_map_.end()) + { + if (!first) + { + sink_.push_back(field_delimiter_); + } + else + { + first = false; + } + sink_.append(it->second.data(), it->second.length()); + } + } + sink_.append(line_delimiter_.data(), line_delimiter_.length()); } - sink_.append(line_delimiter_.data(), line_delimiter_.length()); } for (std::size_t i = 0; i < column_paths_.size(); ++i) { @@ -714,14 +747,10 @@ class basic_csv_encoder final : public basic_json_visitor } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_path_value_map_.find(stack_.back().column_path_); - if (it != column_path_value_map_.end()) + if (column_path_name_map_.find(stack_.back().column_path_) != column_path_name_map_.end()) { - if (!it->second.empty() && subfield_delimiter_ != char_type()) - { - it->second.push_back(subfield_delimiter_); - } - write_null_value(it->second); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); + write_null_value(column_path_value_map_[stack_.back().column_path_]); } break; } @@ -776,11 +805,11 @@ class basic_csv_encoder final : public basic_json_visitor { column_paths_.emplace_back(stack_.back().column_path_); } - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_path_value_map_.find(stack_.back().column_path_); - if (it != column_path_value_map_.end()) + + if (column_path_name_map_.find(stack_.back().column_path_) != column_path_name_map_.end()) { + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); write_string_value(sv, column_path_value_map_[stack_.back().column_path_]); } break; @@ -896,14 +925,10 @@ class basic_csv_encoder final : public basic_json_visitor } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_path_value_map_.find(stack_.back().column_path_); - if (it != column_path_value_map_.end()) + if (column_path_name_map_.find(stack_.back().column_path_) != column_path_name_map_.end()) { - if (!it->second.empty() && subfield_delimiter_ != char_type()) - { - it->second.push_back(subfield_delimiter_); - } - write_double_value(val, context, it->second, ec); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); + write_double_value(val, context, column_path_value_map_[stack_.back().column_path_], ec); } break; } @@ -963,14 +988,10 @@ class basic_csv_encoder final : public basic_json_visitor } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_path_value_map_.find(stack_.back().column_path_); - if (it != column_path_value_map_.end()) + if (column_path_name_map_.find(stack_.back().column_path_) != column_path_name_map_.end()) { - if (!it->second.empty() && subfield_delimiter_ != char_type()) - { - it->second.push_back(subfield_delimiter_); - } - write_int64_value(val, it->second); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); + write_int64_value(val, column_path_value_map_[stack_.back().column_path_]); } break; } @@ -1030,14 +1051,10 @@ class basic_csv_encoder final : public basic_json_visitor } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_path_value_map_.find(stack_.back().column_path_); - if (it != column_path_value_map_.end()) + if (column_path_name_map_.find(stack_.back().column_path_) != column_path_name_map_.end()) { - if (!it->second.empty() && subfield_delimiter_ != char_type()) - { - it->second.push_back(subfield_delimiter_); - } - write_uint64_value(val, it->second); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); + write_uint64_value(val, column_path_value_map_[stack_.back().column_path_]); } break; } @@ -1094,14 +1111,10 @@ class basic_csv_encoder final : public basic_json_visitor } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - auto it = column_path_value_map_.find(stack_.back().column_path_); - if (it != column_path_value_map_.end()) + if (column_path_name_map_.find(stack_.back().column_path_) != column_path_name_map_.end()) { - if (!it->second.empty() && subfield_delimiter_ != char_type()) - { - it->second.push_back(subfield_delimiter_); - } - write_bool_value(val, it->second); + column_path_value_map_[stack_.back().column_path_] = std::basic_string(); + write_bool_value(val, column_path_value_map_[stack_.back().column_path_]); } break; } diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index 86341025e9..bab1f9515d 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,7 +9,7 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv with column mappings") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(Number,Date Time @@ -122,12 +122,12 @@ TEST_CASE("test json to flat csv with column mappings") CHECK(expected == buf); } -#endif +//#endif } TEST_CASE("test json to flat csv") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -342,7 +342,6 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } -#endif SECTION("object of arrays and subarrays to csv") { @@ -406,11 +405,12 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } +//#endif } TEST_CASE("test json to non-flat csv with column mappings") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(Number,Date Time @@ -523,12 +523,12 @@ TEST_CASE("test json to non-flat csv with column mappings") CHECK(expected == buf); } -#endif +//#endif } TEST_CASE("test json to non-flat csv") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -719,6 +719,91 @@ NY,LON,TOR;LON CHECK(expected == buf); } -#endif +//#endif +} + + +TEST_CASE("test json to flat csv with column names") +{ + SECTION("array of objects to csv") + { + std::string expected = R"(boolean,datetime,float,text +true,1971-01-01T04:14:00,1.0,Chicago Reader +true,1948-01-01T14:57:13,1.27,Chicago Sun-Times +)"; + + std::string jtext = R"( +[ + { + "text": "Chicago Reader", + "float": 1.0, + "datetime": "1971-01-01T04:14:00", + "boolean": true + }, + { + "text": "Chicago Sun-Times", + "float": 1.27, + "datetime": "1948-01-01T14:57:13", + "boolean": true + } +] + )"; + + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .flat(true) + .column_names("boolean,datetime,float,text"); + + std::string buf; + csv::csv_string_encoder encoder(buf, options); + j.dump(encoder); + + //std::cout << buf << "\n"; + + CHECK(expected == buf); + } + + SECTION("array of objects to csv with missing name") + { + std::string expected = R"(boolean,datetime,float,text +true,,1.0,Chicago Reader +true,1948-01-01T14:57:13,1.27,Chicago Sun-Times +)"; + + std::string jtext = R"( +[ + { + "text": "Chicago Reader", + "float": 1.0, + "boolean": true + }, + { + "text": "Chicago Sun-Times", + "float": 1.27, + "datetime": "1948-01-01T14:57:13", + "boolean": true + } +] + )"; + + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .flat(true) + .column_names("boolean,datetime,float,text"); + + std::string buf; + csv::csv_string_encoder encoder(buf, options); + j.dump(encoder); + + //std::cout << buf << "\n"; + + CHECK(expected == buf); + } } diff --git a/test/csv/src/csv_tests.cpp b/test/csv/src/csv_tests.cpp index c4613c7df8..150ba41bed 100644 --- a/test/csv/src/csv_tests.cpp +++ b/test/csv/src/csv_tests.cpp @@ -971,18 +971,20 @@ TEST_CASE("serialize_tab_delimited_file") json_stream_reader reader(is,decoder); reader.read_next(); ojson employees1 = decoder.get_result(); + + std::cout << pretty_print(employees1) << "\n"; std::stringstream ss; csv::csv_stream_encoder encoder(ss,options); //std::cout << pretty_print(employees1) << '\n'; employees1.dump(encoder); - //std::cout << ss.str() << '\n'; + std::cout << ss.str() << '\n'; json_decoder encoder2; csv::csv_stream_reader reader2(ss,encoder2,options); reader2.read(); ojson employees2 = encoder2.get_result(); - //std::cout << pretty_print(employees2) << '\n'; + std::cout << pretty_print(employees2) << '\n'; CHECK(employees1.size() == employees2.size()); From 1f58fe977b91e8b16028bc01e7a89c0b56d6c135 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 22 Jan 2025 10:47:08 -0500 Subject: [PATCH 62/79] csv_options column_names with missing column --- include/jsoncons_ext/csv/csv_encoder.hpp | 28 +++++++++++++++--------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 85b6465aa9..bfc14a75cd 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -183,18 +183,9 @@ class basic_csv_encoder final : public basic_json_visitor column_path_name_map_.emplace(item.first, item.second); } } - else if (has_column_names_) + if (has_column_names_) { jsoncons::csv::detail::parse_column_names(options.column_names(), column_names_); - for (const auto& item : column_names_) - { - string_type str{alloc_}; - str.push_back('/'); - str.append(item.data(), item.size()); - column_paths_.emplace_back(str); - column_path_name_map_.emplace(std::move(str), item); - } - has_column_mapping_ = true; } } @@ -259,6 +250,23 @@ class basic_csv_encoder final : public basic_json_visitor stack_.emplace_back(stack_item_kind::column_mapping); return true; } + + // legacy + if (has_column_names_ && stack_.back().count_ == 0) + { + if (stack_.back().item_kind_ == stack_item_kind::flat_row_mapping || stack_.back().item_kind_ == stack_item_kind::row_mapping) + { + for (const auto& item : column_names_) + { + string_type str{alloc_}; + str.push_back('/'); + str.append(item.data(), item.size()); + column_paths_.emplace_back(str); + column_path_name_map_.emplace(std::move(str), item); + } + has_column_mapping_ = true; + } + } switch (stack_.back().item_kind_) { case stack_item_kind::flat_row_mapping: From ea24606562cfa878e597ec7718243f9071a3150f Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 22 Jan 2025 10:51:23 -0500 Subject: [PATCH 63/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 32 +++++------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index bfc14a75cd..937a861900 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -316,10 +316,11 @@ class basic_csv_encoder final : public basic_json_visitor { if (stack_[0].count_ == 0) { - if (has_column_names_) + bool first = true; + for (std::size_t i = 0; i < column_paths_.size(); ++i) { - bool first = true; - for (const auto& item : column_names_) + auto it = column_path_name_map_.find(column_paths_[i]); + if (it != column_path_name_map_.end()) { if (!first) { @@ -329,31 +330,10 @@ class basic_csv_encoder final : public basic_json_visitor { first = false; } - sink_.append(item.data(), item.length()); - } - sink_.append(line_delimiter_.data(), line_delimiter_.length()); - } - else - { - bool first = true; - for (std::size_t i = 0; i < column_paths_.size(); ++i) - { - auto it = column_path_name_map_.find(column_paths_[i]); - if (it != column_path_name_map_.end()) - { - if (!first) - { - sink_.push_back(field_delimiter_); - } - else - { - first = false; - } - sink_.append(it->second.data(), it->second.length()); - } + sink_.append(it->second.data(), it->second.length()); } - sink_.append(line_delimiter_.data(), line_delimiter_.length()); } + sink_.append(line_delimiter_.data(), line_delimiter_.length()); } for (std::size_t i = 0; i < column_paths_.size(); ++i) { From 7b82d53b57b0d49d2fa06cfdb5593ae2924a53e9 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 22 Jan 2025 11:19:28 -0500 Subject: [PATCH 64/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 9 ++++++++- test/csv/src/encode_decode_csv_tests.cpp | 6 +++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 937a861900..79993d7ddf 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -203,7 +203,14 @@ class basic_csv_encoder final : public basic_json_visitor void reset() { stack_.clear(); - column_names_.clear(); + if (!has_column_names_) + { + column_names_.clear(); + } + if (!has_column_mapping_) + { + column_path_name_map_.clear(); + } column_path_value_map_.clear(); column_index_ = 0; } diff --git a/test/csv/src/encode_decode_csv_tests.cpp b/test/csv/src/encode_decode_csv_tests.cpp index 1c9f356ed7..64c2ddc4a0 100644 --- a/test/csv/src/encode_decode_csv_tests.cpp +++ b/test/csv/src/encode_decode_csv_tests.cpp @@ -204,7 +204,7 @@ TEMPLATE_TEST_CASE("test_csv_encoder_reset", "", // Missing column and array end f.encoder.flush(); - CHECK(f.string1() == "h1,h2\n1"); + CHECK("h1,h2\n1" == f.string1()); f.encoder.reset(); f.encoder.begin_array(); f.encoder.begin_array(); @@ -217,7 +217,7 @@ TEMPLATE_TEST_CASE("test_csv_encoder_reset", "", f.encoder.end_array(); f.encoder.end_array(); f.encoder.flush(); - CHECK(f.string1() == "h1,h2\n1h3,h4\n3,4\n"); + CHECK("h1,h2\n1h3,h4\n3,4\n" == f.string1()); // Reset and encode to different sink f.encoder.reset(f.output2); @@ -232,7 +232,7 @@ TEMPLATE_TEST_CASE("test_csv_encoder_reset", "", f.encoder.end_array(); f.encoder.end_array(); f.encoder.flush(); - CHECK(f.string2() == "h5,h6\n5,6\n"); + CHECK("h5,h6\n5,6\n" == f.string2()); } namespace { namespace ns { From bfd9e6a20957306b147e0694c0902cfcf64f282a Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 22 Jan 2025 11:42:09 -0500 Subject: [PATCH 65/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 79993d7ddf..c1de5ef966 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -125,6 +125,7 @@ class basic_csv_encoder final : public basic_json_visitor std::vector column_names_; std::vector column_paths_; + std::vector column_path_values_; std::unordered_map,std::equal_to,string_string_allocator_type> column_path_value_map_; std::unordered_map,std::equal_to,string_string_allocator_type> column_path_name_map_; column_path_column_map_type column_path_column_map_; @@ -509,9 +510,15 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } } + auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); + if (it != column_paths_.end()) + { + column_path_values_.resize(column_paths_.size()); + column_path_values_[it-column_paths_.begin()] = std::basic_string(); + } + value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); } @@ -591,6 +598,7 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::multivalued_field: { + column_path_value_map_[stack_[stack_.size()-2].column_path_] = std::basic_string(); auto it = column_path_value_map_.find(stack_[stack_.size()-2].column_path_); if (it != column_path_value_map_.end()) { From 3bd4e31b3836f9161f85775167c7bf12006ed026 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 22 Jan 2025 13:59:29 -0500 Subject: [PATCH 66/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index c1de5ef966..574c5ecb60 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -512,12 +512,6 @@ class basic_csv_encoder final : public basic_json_visitor column_paths_.emplace_back(stack_.back().column_path_); } } - auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); - if (it != column_paths_.end()) - { - column_path_values_.resize(column_paths_.size()); - column_path_values_[it-column_paths_.begin()] = std::basic_string(); - } value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); @@ -598,12 +592,14 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::multivalued_field: { - column_path_value_map_[stack_[stack_.size()-2].column_path_] = std::basic_string(); - auto it = column_path_value_map_.find(stack_[stack_.size()-2].column_path_); - if (it != column_path_value_map_.end()) + auto it1 = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); + if (it1 != column_paths_.end()) { - it->second.append(value_buffer_.data(),value_buffer_.length()); + column_path_values_.resize(column_paths_.size()); + column_path_values_[it1-column_paths_.begin()] = value_buffer_; } + + column_path_value_map_[stack_[stack_.size()-2].column_path_] = value_buffer_; break; } case stack_item_kind::row: From 6fc1a609aeefe92a9f1240969990cc476ec7cab8 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 22 Jan 2025 14:46:50 -0500 Subject: [PATCH 67/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 34 +++++++++--------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 574c5ecb60..85606a879d 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -210,8 +210,10 @@ class basic_csv_encoder final : public basic_json_visitor } if (!has_column_mapping_) { + column_paths_.clear(); column_path_name_map_.clear(); } + column_path_values_.clear(); column_path_value_map_.clear(); column_index_ = 0; } @@ -787,6 +789,7 @@ class basic_csv_encoder final : public basic_json_visitor default: break; } + ++stack_.back().count_; return true; } @@ -847,6 +850,7 @@ class basic_csv_encoder final : public basic_json_visitor default: break; } + ++stack_.back().count_; return true; } @@ -965,6 +969,7 @@ class basic_csv_encoder final : public basic_json_visitor default: break; } + ++stack_.back().count_; return true; } @@ -1028,6 +1033,7 @@ class basic_csv_encoder final : public basic_json_visitor default: break; } + ++stack_.back().count_; return true; } @@ -1091,6 +1097,7 @@ class basic_csv_encoder final : public basic_json_visitor default: break; } + ++stack_.back().count_; return true; } @@ -1151,11 +1158,15 @@ class basic_csv_encoder final : public basic_json_visitor default: break; } + ++stack_.back().count_; return true; } - bool do_string_value(const CharT* s, std::size_t length, string_type& str) + void write_string_value(const string_view_type& value, string_type& str) { + const char* s = value.data(); + const std::size_t length = value.length(); + bool quote = false; if (quote_style_ == quote_style_kind::all || quote_style_ == quote_style_kind::nonnumeric || (quote_style_ == quote_style_kind::minimal && @@ -1169,14 +1180,6 @@ class basic_csv_encoder final : public basic_json_visitor { str.push_back(quote_char_); } - - return true; - } - - void write_string_value(const string_view_type& value, string_type& str) - { - do_string_value(value.data(),value.length(), str); - ++stack_.back().count_; } void write_double_value(double val, const ser_context& context, string_type& str, std::error_code& ec) @@ -1233,23 +1236,16 @@ class basic_csv_encoder final : public basic_json_visitor { fp_(val, str); } - - ++stack_.back().count_; - } void write_int64_value(int64_t val, string_type& str) { jsoncons::detail::from_integer(val,str); - - ++stack_.back().count_; } void write_uint64_value(uint64_t val, string_type& str) { jsoncons::detail::from_integer(val,str); - - ++stack_.back().count_; } void write_bool_value(bool val, string_type& str) @@ -1262,15 +1258,11 @@ class basic_csv_encoder final : public basic_json_visitor { str.append(false_constant().data(), false_constant().size()); } - - ++stack_.back().count_; } - bool write_null_value(string_type& str) + void write_null_value(string_type& str) { str.append(null_constant().data(), null_constant().size()); - ++stack_.back().count_; - return true; } }; From 8ce0098ca2d6d312eadb269444de620c7609a785 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 22 Jan 2025 15:05:54 -0500 Subject: [PATCH 68/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 85606a879d..99bba3eeee 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -280,7 +280,6 @@ class basic_csv_encoder final : public basic_json_visitor switch (stack_.back().item_kind_) { case stack_item_kind::flat_row_mapping: - column_index_ = 0; stack_.emplace_back(stack_item_kind::flat_object); break; case stack_item_kind::row_mapping: @@ -492,10 +491,6 @@ class basic_csv_encoder final : public basic_json_visitor else { append_array_path_component(); - if (stack_[0].count_ == 0) - { - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); - } value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); } From 2a4c20bf9ca1fd8154bf5d8ce12fff0d2321fc08 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 22 Jan 2025 15:29:13 -0500 Subject: [PATCH 69/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 99bba3eeee..b8e7fd4e94 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -183,6 +183,7 @@ class basic_csv_encoder final : public basic_json_visitor column_paths_.emplace_back(item.first); column_path_name_map_.emplace(item.first, item.second); } + column_path_values_.resize(column_paths_.size()); } if (has_column_names_) { @@ -213,7 +214,6 @@ class basic_csv_encoder final : public basic_json_visitor column_paths_.clear(); column_path_name_map_.clear(); } - column_path_values_.clear(); column_path_value_map_.clear(); column_index_ = 0; } @@ -272,6 +272,7 @@ class basic_csv_encoder final : public basic_json_visitor str.push_back('/'); str.append(item.data(), item.size()); column_paths_.emplace_back(str); + column_path_values_.resize(column_paths_.size()); column_path_name_map_.emplace(std::move(str), item); } has_column_mapping_ = true; @@ -507,6 +508,7 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); + column_path_values_.resize(column_paths_.size()); } } @@ -589,12 +591,11 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::multivalued_field: { - auto it1 = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); + /*auto it1 = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); if (it1 != column_paths_.end()) { - column_path_values_.resize(column_paths_.size()); column_path_values_[it1-column_paths_.begin()] = value_buffer_; - } + }*/ column_path_value_map_[stack_[stack_.size()-2].column_path_] = value_buffer_; break; @@ -719,6 +720,7 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); + column_path_values_.resize(column_paths_.size()); if (stack_.back().item_kind_ == stack_item_kind::row) { column_path_name_map_.emplace(stack_.back().column_path_, stack_.back().column_path_); @@ -740,6 +742,7 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); + column_path_values_.resize(column_paths_.size()); } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } @@ -801,6 +804,7 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); + column_path_values_.resize(column_paths_.size()); } } @@ -920,6 +924,7 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); + column_path_values_.resize(column_paths_.size()); } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } @@ -984,6 +989,7 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); + column_path_values_.resize(column_paths_.size()); } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } @@ -1048,6 +1054,7 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); + column_path_values_.resize(column_paths_.size()); } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } @@ -1109,6 +1116,7 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); + column_path_values_.resize(column_paths_.size()); } column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } From 7957ed2c584808385fd2783064ca431b6d1959bc Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 22 Jan 2025 16:13:28 -0500 Subject: [PATCH 70/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 164 ++++++++++------------- 1 file changed, 73 insertions(+), 91 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index b8e7fd4e94..69801cff5a 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -126,7 +126,6 @@ class basic_csv_encoder final : public basic_json_visitor std::vector column_names_; std::vector column_paths_; std::vector column_path_values_; - std::unordered_map,std::equal_to,string_string_allocator_type> column_path_value_map_; std::unordered_map,std::equal_to,string_string_allocator_type> column_path_name_map_; column_path_column_map_type column_path_column_map_; @@ -205,16 +204,11 @@ class basic_csv_encoder final : public basic_json_visitor void reset() { stack_.clear(); - if (!has_column_names_) - { - column_names_.clear(); - } if (!has_column_mapping_) { column_paths_.clear(); - column_path_name_map_.clear(); + column_path_values_.clear(); } - column_path_value_map_.clear(); column_index_ = 0; } @@ -351,11 +345,13 @@ class basic_csv_encoder final : public basic_json_visitor { sink_.push_back(field_delimiter_); } - auto it = column_path_value_map_.find(column_paths_[i]); - if (it != column_path_value_map_.end()) + auto it = std::find(column_paths_.begin(), column_paths_.end(), column_paths_[i]); + if (it != column_paths_.end()) { - sink_.append(it->second.data(),it->second.length()); - it->second.clear(); + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + auto& value = column_path_values_[it - column_paths_.begin()]; + sink_.append(value.data(), value.length()); + value.clear(); } } sink_.append(line_delimiter_.data(), line_delimiter_.length()); @@ -579,11 +575,13 @@ class basic_csv_encoder final : public basic_json_visitor { sink_.push_back(field_delimiter_); } - auto it = column_path_value_map_.find(column_paths_[i]); - if (it != column_path_value_map_.end()) + auto it = std::find(column_paths_.begin(), column_paths_.end(), column_paths_[i]); + if (it != column_paths_.end()) { - sink_.append(it->second.data(),it->second.length()); - it->second.clear(); + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + auto& value = column_path_values_[it - column_paths_.begin()]; + sink_.append(value.data(), value.length()); + value.clear(); } } sink_.append(line_delimiter_.data(), line_delimiter_.length()); @@ -591,13 +589,12 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::multivalued_field: { - /*auto it1 = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); - if (it1 != column_paths_.end()) + auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_[stack_.size() - 2].column_path_); + if (it != column_paths_.end()) { - column_path_values_[it1-column_paths_.begin()] = value_buffer_; - }*/ - - column_path_value_map_[stack_[stack_.size()-2].column_path_] = value_buffer_; + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + column_path_values_[it - column_paths_.begin()] = value_buffer_; + } break; } case stack_item_kind::row: @@ -629,11 +626,13 @@ class basic_csv_encoder final : public basic_json_visitor { sink_.push_back(field_delimiter_); } - auto it = column_path_value_map_.find(column_paths_[i]); - if (it != column_path_value_map_.end()) + auto it = std::find(column_paths_.begin(), column_paths_.end(), column_paths_[i]); + if (it != column_paths_.end()) { - sink_.append(it->second.data(),it->second.length()); - it->second.clear(); + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + auto& value = column_path_values_[it - column_paths_.begin()]; + sink_.append(value.data(), value.length()); + value.clear(); } } sink_.append(line_delimiter_.data(), line_delimiter_.length()); @@ -744,12 +743,12 @@ class basic_csv_encoder final : public basic_json_visitor column_paths_.emplace_back(stack_.back().column_path_); column_path_values_.resize(column_paths_.size()); } - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - if (column_path_name_map_.find(stack_.back().column_path_) != column_path_name_map_.end()) + auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); + if (it != column_paths_.end()) { - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); - write_null_value(column_path_value_map_[stack_.back().column_path_]); + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + write_null_value(column_path_values_[it - column_paths_.begin()]); } break; } @@ -757,14 +756,11 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - if (stack_[0].count_ == 0) - { - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); - } - auto it = column_path_value_map_.find(stack_.back().column_path_); - if (it != column_path_value_map_.end()) + auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); + if (it != column_paths_.end()) { - write_null_value(it->second); + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + write_null_value(column_path_values_[it - column_paths_.begin()]); } break; } @@ -807,11 +803,12 @@ class basic_csv_encoder final : public basic_json_visitor column_path_values_.resize(column_paths_.size()); } } - - if (column_path_name_map_.find(stack_.back().column_path_) != column_path_name_map_.end()) + + auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); + if (it != column_paths_.end()) { - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); - write_string_value(sv, column_path_value_map_[stack_.back().column_path_]); + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + write_string_value(sv, column_path_values_[it - column_paths_.begin()]); } break; } @@ -819,14 +816,11 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - if (stack_[0].count_ == 0) + auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); + if (it != column_paths_.end()) { - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); - } - auto it = column_path_value_map_.find(stack_.back().column_path_); - if (it != column_path_value_map_.end()) - { - write_string_value(sv, it->second); + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + write_string_value(sv, column_path_values_[it - column_paths_.begin()]); } break; } @@ -926,12 +920,12 @@ class basic_csv_encoder final : public basic_json_visitor column_paths_.emplace_back(stack_.back().column_path_); column_path_values_.resize(column_paths_.size()); } - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - if (column_path_name_map_.find(stack_.back().column_path_) != column_path_name_map_.end()) + auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); + if (it != column_paths_.end()) { - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); - write_double_value(val, context, column_path_value_map_[stack_.back().column_path_], ec); + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + write_double_value(val, context, column_path_values_[it - column_paths_.begin()], ec); } break; } @@ -939,14 +933,11 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - if (stack_[0].count_ == 0) + auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); + if (it != column_paths_.end()) { - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); - } - auto it = column_path_value_map_.find(stack_.back().column_path_); - if (it != column_path_value_map_.end()) - { - write_double_value(val, context, it->second, ec); + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + write_double_value(val, context, column_path_values_[it - column_paths_.begin()], ec); } break; } @@ -991,12 +982,12 @@ class basic_csv_encoder final : public basic_json_visitor column_paths_.emplace_back(stack_.back().column_path_); column_path_values_.resize(column_paths_.size()); } - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - if (column_path_name_map_.find(stack_.back().column_path_) != column_path_name_map_.end()) + auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); + if (it != column_paths_.end()) { - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); - write_int64_value(val, column_path_value_map_[stack_.back().column_path_]); + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + write_int64_value(val, column_path_values_[it - column_paths_.begin()]); } break; } @@ -1004,14 +995,11 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - if (stack_[0].count_ == 0) + auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); + if (it != column_paths_.end()) { - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); - } - auto it = column_path_value_map_.find(stack_.back().column_path_); - if (it != column_path_value_map_.end()) - { - write_int64_value(val, it->second); + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + write_int64_value(val, column_path_values_[it - column_paths_.begin()]); } break; } @@ -1056,12 +1044,12 @@ class basic_csv_encoder final : public basic_json_visitor column_paths_.emplace_back(stack_.back().column_path_); column_path_values_.resize(column_paths_.size()); } - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - if (column_path_name_map_.find(stack_.back().column_path_) != column_path_name_map_.end()) + auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); + if (it != column_paths_.end()) { - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); - write_uint64_value(val, column_path_value_map_[stack_.back().column_path_]); + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + write_uint64_value(val, column_path_values_[it - column_paths_.begin()]); } break; } @@ -1069,14 +1057,11 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - if (stack_[0].count_ == 0) + auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); + if (it != column_paths_.end()) { - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); - } - auto it = column_path_value_map_.find(stack_.back().column_path_); - if (it != column_path_value_map_.end()) - { - write_uint64_value(val, it->second); + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + write_uint64_value(val, column_path_values_[it - column_paths_.begin()]); } break; } @@ -1118,12 +1103,12 @@ class basic_csv_encoder final : public basic_json_visitor column_paths_.emplace_back(stack_.back().column_path_); column_path_values_.resize(column_paths_.size()); } - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); } - if (column_path_name_map_.find(stack_.back().column_path_) != column_path_name_map_.end()) + auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); + if (it != column_paths_.end()) { - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); - write_bool_value(val, column_path_value_map_[stack_.back().column_path_]); + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + write_bool_value(val, column_path_values_[it - column_paths_.begin()]); } break; } @@ -1131,14 +1116,11 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - if (stack_[0].count_ == 0) - { - column_path_value_map_[stack_.back().column_path_] = std::basic_string(); - } - auto it = column_path_value_map_.find(stack_.back().column_path_); - if (it != column_path_value_map_.end()) + auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); + if (it != column_paths_.end()) { - write_bool_value(val, it->second); + JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); + write_bool_value(val, column_path_values_[it - column_paths_.begin()]); } break; } From 34faf40b29dcba375c4fffa5c4b92156ec3dffe9 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 22 Jan 2025 18:19:05 -0500 Subject: [PATCH 71/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 145 ++++++++++------------- 1 file changed, 63 insertions(+), 82 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 69801cff5a..7226c95fc8 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -125,8 +125,8 @@ class basic_csv_encoder final : public basic_json_visitor std::vector column_names_; std::vector column_paths_; - std::vector column_path_values_; std::unordered_map,std::equal_to,string_string_allocator_type> column_path_name_map_; + std::unordered_map,std::equal_to,string_string_allocator_type> column_path_value_map_; column_path_column_map_type column_path_column_map_; std::size_t column_index_{0}; @@ -181,8 +181,8 @@ class basic_csv_encoder final : public basic_json_visitor { column_paths_.emplace_back(item.first); column_path_name_map_.emplace(item.first, item.second); + column_path_value_map_.emplace(item.first, string_type{alloc_}); } - column_path_values_.resize(column_paths_.size()); } if (has_column_names_) { @@ -207,7 +207,7 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.clear(); - column_path_values_.clear(); + column_path_value_map_.clear(); } column_index_ = 0; } @@ -266,7 +266,7 @@ class basic_csv_encoder final : public basic_json_visitor str.push_back('/'); str.append(item.data(), item.size()); column_paths_.emplace_back(str); - column_path_values_.resize(column_paths_.size()); + column_path_value_map_.emplace(str, string_type{alloc_}); column_path_name_map_.emplace(std::move(str), item); } has_column_mapping_ = true; @@ -345,13 +345,11 @@ class basic_csv_encoder final : public basic_json_visitor { sink_.push_back(field_delimiter_); } - auto it = std::find(column_paths_.begin(), column_paths_.end(), column_paths_[i]); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(column_paths_[i]); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - auto& value = column_path_values_[it - column_paths_.begin()]; - sink_.append(value.data(), value.length()); - value.clear(); + sink_.append(it->second.data(), it->second.length()); + it->second.clear(); } } sink_.append(line_delimiter_.data(), line_delimiter_.length()); @@ -504,7 +502,7 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); - column_path_values_.resize(column_paths_.size()); + column_path_value_map_.emplace(stack_.back().column_path_, string_type{alloc_}); } } @@ -575,13 +573,11 @@ class basic_csv_encoder final : public basic_json_visitor { sink_.push_back(field_delimiter_); } - auto it = std::find(column_paths_.begin(), column_paths_.end(), column_paths_[i]); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(column_paths_[i]); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - auto& value = column_path_values_[it - column_paths_.begin()]; - sink_.append(value.data(), value.length()); - value.clear(); + sink_.append(it->second.data(), it->second.length()); + it->second.clear(); } } sink_.append(line_delimiter_.data(), line_delimiter_.length()); @@ -589,11 +585,10 @@ class basic_csv_encoder final : public basic_json_visitor break; case stack_item_kind::multivalued_field: { - auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_[stack_.size() - 2].column_path_); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(stack_[stack_.size() - 2].column_path_); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - column_path_values_[it - column_paths_.begin()] = value_buffer_; + it->second = value_buffer_; } break; } @@ -626,13 +621,11 @@ class basic_csv_encoder final : public basic_json_visitor { sink_.push_back(field_delimiter_); } - auto it = std::find(column_paths_.begin(), column_paths_.end(), column_paths_[i]); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(column_paths_[i]); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - auto& value = column_path_values_[it - column_paths_.begin()]; - sink_.append(value.data(), value.length()); - value.clear(); + sink_.append(it->second.data(), it->second.length()); + it->second.clear(); } } sink_.append(line_delimiter_.data(), line_delimiter_.length()); @@ -719,7 +712,7 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); - column_path_values_.resize(column_paths_.size()); + column_path_value_map_.emplace(stack_.back().column_path_, string_type{alloc_}); if (stack_.back().item_kind_ == stack_item_kind::row) { column_path_name_map_.emplace(stack_.back().column_path_, stack_.back().column_path_); @@ -741,14 +734,13 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); - column_path_values_.resize(column_paths_.size()); + column_path_value_map_.emplace(stack_.back().column_path_, string_type{alloc_}); } } - auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - write_null_value(column_path_values_[it - column_paths_.begin()]); + write_null_value(it->second); } break; } @@ -756,11 +748,10 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - write_null_value(column_path_values_[it - column_paths_.begin()]); + write_null_value(it->second); } break; } @@ -800,15 +791,14 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); - column_path_values_.resize(column_paths_.size()); + column_path_value_map_.emplace(stack_.back().column_path_, string_type{alloc_}); } } - auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - write_string_value(sv, column_path_values_[it - column_paths_.begin()]); + write_string_value(sv, it->second); } break; } @@ -816,11 +806,10 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - write_string_value(sv, column_path_values_[it - column_paths_.begin()]); + write_string_value(sv, it->second); } break; } @@ -918,14 +907,13 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); - column_path_values_.resize(column_paths_.size()); + column_path_value_map_.emplace(stack_.back().column_path_, string_type{alloc_}); } } - auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - write_double_value(val, context, column_path_values_[it - column_paths_.begin()], ec); + write_double_value(val, context, it->second, ec); } break; } @@ -933,11 +921,10 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - write_double_value(val, context, column_path_values_[it - column_paths_.begin()], ec); + write_double_value(val, context, it->second, ec); } break; } @@ -980,14 +967,13 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); - column_path_values_.resize(column_paths_.size()); + column_path_value_map_.emplace(stack_.back().column_path_, string_type{alloc_}); } } - auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - write_int64_value(val, column_path_values_[it - column_paths_.begin()]); + write_int64_value(val, it->second); } break; } @@ -995,11 +981,10 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - write_int64_value(val, column_path_values_[it - column_paths_.begin()]); + write_int64_value(val, it->second); } break; } @@ -1042,14 +1027,13 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); - column_path_values_.resize(column_paths_.size()); + column_path_value_map_.emplace(stack_.back().column_path_, string_type{alloc_}); } } - auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - write_uint64_value(val, column_path_values_[it - column_paths_.begin()]); + write_uint64_value(val, it->second); } break; } @@ -1057,11 +1041,10 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - write_uint64_value(val, column_path_values_[it - column_paths_.begin()]); + write_uint64_value(val, it->second); } break; } @@ -1101,14 +1084,13 @@ class basic_csv_encoder final : public basic_json_visitor if (!has_column_mapping_) { column_paths_.emplace_back(stack_.back().column_path_); - column_path_values_.resize(column_paths_.size()); + column_path_value_map_.emplace(stack_.back().column_path_, string_type{alloc_}); } } - auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - write_bool_value(val, column_path_values_[it - column_paths_.begin()]); + write_bool_value(val, it->second); } break; } @@ -1116,11 +1098,10 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::row: { append_array_path_component(); - auto it = std::find(column_paths_.begin(), column_paths_.end(), stack_.back().column_path_); - if (it != column_paths_.end()) + auto it = column_path_value_map_.find(stack_.back().column_path_); + if (it != column_path_value_map_.end()) { - JSONCONS_ASSERT(column_paths_.size() == column_path_values_.size()); - write_bool_value(val, column_path_values_[it - column_paths_.begin()]); + write_bool_value(val, it->second); } break; } From 909db9d16cd807be66a9a7522f798ebb5afcdebd Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 22 Jan 2025 19:42:30 -0500 Subject: [PATCH 72/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 21 ++++++++ test/csv/src/csv_encoder_tests.cpp | 68 +++++++++++++++++++++--- 2 files changed, 81 insertions(+), 8 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 7226c95fc8..399a0ee762 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -463,6 +463,27 @@ class basic_csv_encoder final : public basic_json_visitor } return true; } + // legacy + if (has_column_names_ && stack_.back().count_ == 0) + { + if (stack_.back().item_kind_ == stack_item_kind::flat_row_mapping || stack_.back().item_kind_ == stack_item_kind::row_mapping) + { + std::size_t index = 0; + for (const auto& item : column_names_) + { + string_type str{alloc_}; + str.push_back('/'); + buffer_.clear(); + jsoncons::detail::from_integer(index, buffer_); + str.append(buffer_.data(), buffer_.size()); + column_paths_.emplace_back(str); + column_path_value_map_.emplace(str, string_type{alloc_}); + column_path_name_map_.emplace(std::move(str), item); + ++index; + } + has_column_mapping_ = true; + } + } switch (stack_.back().item_kind_) { diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index bab1f9515d..caf4907b47 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,7 +9,7 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv with column mappings") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(Number,Date Time @@ -122,12 +122,12 @@ TEST_CASE("test json to flat csv with column mappings") CHECK(expected == buf); } -//#endif +#endif } TEST_CASE("test json to flat csv") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -405,12 +405,12 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } -//#endif +#endif } TEST_CASE("test json to non-flat csv with column mappings") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(Number,Date Time @@ -523,12 +523,12 @@ TEST_CASE("test json to non-flat csv with column mappings") CHECK(expected == buf); } -//#endif +#endif } TEST_CASE("test json to non-flat csv") { -//#if 0 +#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -719,12 +719,13 @@ NY,LON,TOR;LON CHECK(expected == buf); } -//#endif +#endif } TEST_CASE("test json to flat csv with column names") { +#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -805,5 +806,56 @@ true,1948-01-01T14:57:13,1.27,Chicago Sun-Times CHECK(expected == buf); } +#endif + SECTION("array of arrays to csv with column_names") + { + std::string expected = R"(text,float +Chicago Reader,1.0 +Chicago Sun-Times,1.27 +)"; + + std::string jtext = R"( +[ + [ + "Chicago Reader", + 1.0, + "1971-01-01T04:14:00", + true, + [ + "04:14:00", + [ + "1971-01-01", + 40 + ] + ] + ], + [ + "Chicago Sun-Times", + 1.27, + "1948-01-01T14:57:13", + true, + [ + "14:57:13", + [ + "1948-01-01", + 63 + ] + ] + ] +] + )"; + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .flat(true) + .column_names("text,float"); + + std::string buf; + csv::csv_string_encoder encoder(buf, options); + j.dump(encoder); + CHECK(expected == buf); + } } From 0d12854a0216d4efdd91f082c7a82b67222a468e Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 22 Jan 2025 22:29:18 -0500 Subject: [PATCH 73/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 33 ++++++++++++++++++++---- test/csv/src/csv_encoder_tests.cpp | 33 +++++++++++++++++++++++- 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 399a0ee762..298b943530 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -252,6 +252,19 @@ class basic_csv_encoder final : public basic_json_visitor if (stack_.empty()) { stack_.emplace_back(stack_item_kind::column_mapping); + if (has_column_names_) + { + for (const auto& item : column_names_) + { + string_type str{alloc_}; + str.push_back('/'); + str.append(item.data(), item.size()); + column_paths_.emplace_back(str); + column_path_value_map_.emplace(str, string_type{alloc_}); + column_path_name_map_.emplace(std::move(str), item); + } + has_column_mapping_ = true; + } return true; } @@ -393,24 +406,34 @@ class basic_csv_encoder final : public basic_json_visitor while (!done) { std::size_t missing_cols = 0; - + std::size_t new_missing_cols = 0; bool first = true; - for (auto& item : columns) + for (std::size_t i = 0; i < no_cols; ++i) { + auto& item = columns[i]; if (item.first == item.second) - { + { ++missing_cols; + ++new_missing_cols; if (missing_cols == no_cols) { done = true; - break; + } + else if (i == (no_cols-1)) + { + while (new_missing_cols > 0) + { + sink_.push_back(field_delimiter_); + --new_missing_cols; + } } } else { - for (std::size_t i = 0; i < missing_cols; ++i) + while (new_missing_cols > 0) { sink_.push_back(field_delimiter_); + --new_missing_cols; } if (!first) { diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index caf4907b47..b473f2a220 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -806,7 +806,6 @@ true,1948-01-01T14:57:13,1.27,Chicago Sun-Times CHECK(expected == buf); } -#endif SECTION("array of arrays to csv with column_names") { std::string expected = R"(text,float @@ -857,5 +856,37 @@ Chicago Sun-Times,1.27 j.dump(encoder); CHECK(expected == buf); } +#endif + + SECTION("object of arrays and subarrays to csv with column_names") + { + std::string expected = R"(b,c,a +7;8;9,15,1;true;null +10;11;12,16,-4;5.5;6 +,17, +)"; + + const std::string jtext = R"( +{ + "a" : [[1,true,null],[-4,5.5,"6"]], + "b" : [[7,8,9],[10,11,12]], + "c" : [15,16,17] +} + )"; + + auto j = jsoncons::json::parse(jtext); + //std::cout << pretty_print(j) << "\n"; + + auto options = csv::csv_options{} + .subfield_delimiter(';') + .column_names("b,c,a"); + + std::string buf; + csv::csv_string_encoder encoder(buf, options); + j.dump(encoder); + + //std::cout << buf << "\n"; + CHECK(expected == buf); + } } From cc2b5ddd8006021027424706e1762f265cc7d149 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Wed, 22 Jan 2025 22:52:01 -0500 Subject: [PATCH 74/79] csv_encoder --- test/csv/src/encode_decode_csv_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/csv/src/encode_decode_csv_tests.cpp b/test/csv/src/encode_decode_csv_tests.cpp index 64c2ddc4a0..bae7533f86 100644 --- a/test/csv/src/encode_decode_csv_tests.cpp +++ b/test/csv/src/encode_decode_csv_tests.cpp @@ -204,7 +204,7 @@ TEMPLATE_TEST_CASE("test_csv_encoder_reset", "", // Missing column and array end f.encoder.flush(); - CHECK("h1,h2\n1" == f.string1()); + CHECK("h1,h2\n" == f.string1()); f.encoder.reset(); f.encoder.begin_array(); f.encoder.begin_array(); @@ -217,7 +217,7 @@ TEMPLATE_TEST_CASE("test_csv_encoder_reset", "", f.encoder.end_array(); f.encoder.end_array(); f.encoder.flush(); - CHECK("h1,h2\n1h3,h4\n3,4\n" == f.string1()); + CHECK("h1,h2\nh3,h4\n3,4\n" == f.string1()); // Reset and encode to different sink f.encoder.reset(f.output2); From 0397c7ebcf01165b149d10e47b3e513706f9f794 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 23 Jan 2025 09:58:01 -0500 Subject: [PATCH 75/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 298b943530..d8472d24d1 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -170,9 +170,13 @@ class basic_csv_encoder final : public basic_json_visitor neginf_to_str_(options.neginf_to_str()), alloc_(alloc), fp_(options.float_format(), options.precision()), + column_names_(alloc), + column_paths_(alloc), + column_path_name_map_(alloc), + column_path_value_map_(alloc), + column_path_column_map_(alloc), buffer_(alloc), value_buffer_(alloc), - column_path_column_map_(alloc), column_it_(column_path_column_map_.end()) { if (has_column_mapping_) From 7744945b6b7b230a99490d19565ef8ec21037190 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 23 Jan 2025 10:41:52 -0500 Subject: [PATCH 76/79] csv_encoder --- include/jsoncons_ext/csv/csv_encoder.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index d8472d24d1..0f80132856 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -132,7 +132,7 @@ class basic_csv_encoder final : public basic_json_visitor std::size_t column_index_{0}; string_type buffer_; string_type value_buffer_; - column_path_column_map_type::iterator column_it_; + typename column_path_column_map_type::iterator column_it_; // Noncopyable and nonmoveable basic_csv_encoder(const basic_csv_encoder&) = delete; From 595264f420edfea9c0b751c0ee01b15338db5845 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 23 Jan 2025 12:21:22 -0500 Subject: [PATCH 77/79] csv_encoder use json_pointer --- include/jsoncons_ext/csv/csv_encoder.hpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 0f80132856..0487bbeb4a 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -23,6 +23,7 @@ #include #include #include +#include namespace jsoncons { namespace csv { @@ -33,11 +34,12 @@ class basic_csv_encoder final : public basic_json_visitor using char_type = CharT; using typename basic_json_visitor::string_view_type; using sink_type = Sink; - using allocator_type = Allocator; using char_allocator_type = typename std::allocator_traits:: template rebind_alloc; using string_type = std::basic_string, char_allocator_type>; + using json_pointer_type = jsonpointer::basic_json_pointer; using string_allocator_type = typename std::allocator_traits:: template rebind_alloc; + using json_pointer_allocator_type = typename std::allocator_traits:: template rebind_alloc; using string_string_allocator_type = typename std::allocator_traits:: template rebind_alloc>; using string_vector_allocator_type = typename std::allocator_traits:: template rebind_alloc>>; using column_type = std::vector; @@ -124,7 +126,7 @@ class basic_csv_encoder final : public basic_json_visitor jsoncons::detail::write_double fp_; std::vector column_names_; - std::vector column_paths_; + std::vector column_paths_; std::unordered_map,std::equal_to,string_string_allocator_type> column_path_name_map_; std::unordered_map,std::equal_to,string_string_allocator_type> column_path_value_map_; column_path_column_map_type column_path_column_map_; @@ -340,7 +342,7 @@ class basic_csv_encoder final : public basic_json_visitor bool first = true; for (std::size_t i = 0; i < column_paths_.size(); ++i) { - auto it = column_path_name_map_.find(column_paths_[i]); + auto it = column_path_name_map_.find(column_paths_[i].string()); if (it != column_path_name_map_.end()) { if (!first) @@ -362,7 +364,7 @@ class basic_csv_encoder final : public basic_json_visitor { sink_.push_back(field_delimiter_); } - auto it = column_path_value_map_.find(column_paths_[i]); + auto it = column_path_value_map_.find(column_paths_[i].string()); if (it != column_path_value_map_.end()) { sink_.append(it->second.data(), it->second.length()); @@ -379,7 +381,7 @@ class basic_csv_encoder final : public basic_json_visitor bool first = true; for (std::size_t i = 0; i < column_paths_.size(); ++i) { - auto it = column_path_name_map_.find(column_paths_[i]); + auto it = column_path_name_map_.find(column_paths_[i].string()); if (it != column_path_name_map_.end()) { if (!first) @@ -396,7 +398,7 @@ class basic_csv_encoder final : public basic_json_visitor std::vector> columns; for (const auto& item : column_paths_) { - auto it = column_path_column_map_.find(item); + auto it = column_path_column_map_.find(item.string()); if (it != column_path_column_map_.end()) { columns.emplace_back((*it).second.cbegin(), (*it).second.cend()); @@ -601,7 +603,7 @@ class basic_csv_encoder final : public basic_json_visitor std::size_t col = 0; for (std::size_t i = 0; i < column_paths_.size(); ++i) { - auto it = column_path_name_map_.find(column_paths_[i]); + auto it = column_path_name_map_.find(column_paths_[i].string()); if (it != column_path_name_map_.end()) { if (col > 0) @@ -621,7 +623,7 @@ class basic_csv_encoder final : public basic_json_visitor { sink_.push_back(field_delimiter_); } - auto it = column_path_value_map_.find(column_paths_[i]); + auto it = column_path_value_map_.find(column_paths_[i].string()); if (it != column_path_value_map_.end()) { sink_.append(it->second.data(), it->second.length()); @@ -648,7 +650,7 @@ class basic_csv_encoder final : public basic_json_visitor std::size_t col = 0; for (std::size_t i = 0; i < column_paths_.size(); ++i) { - auto it = column_path_name_map_.find(column_paths_[i]); + auto it = column_path_name_map_.find(column_paths_[i].string()); if (it != column_path_name_map_.end()) { if (col > 0) @@ -669,7 +671,7 @@ class basic_csv_encoder final : public basic_json_visitor { sink_.push_back(field_delimiter_); } - auto it = column_path_value_map_.find(column_paths_[i]); + auto it = column_path_value_map_.find(column_paths_[i].string()); if (it != column_path_value_map_.end()) { sink_.append(it->second.data(), it->second.length()); From 179d71b8947bd3bcdaae1328f5ff25cc653ad2dc Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 23 Jan 2025 12:50:50 -0500 Subject: [PATCH 78/79] csv_encoder use json_pointer --- include/jsoncons_ext/csv/csv_encoder.hpp | 17 +++++++------- .../jsoncons_ext/jsonpointer/jsonpointer.hpp | 22 +++++++++++++++++++ test/csv/src/csv_encoder_tests.cpp | 20 ++++++++--------- 3 files changed, 41 insertions(+), 18 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 0487bbeb4a..47180b0cd5 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -37,10 +37,11 @@ class basic_csv_encoder final : public basic_json_visitor using allocator_type = Allocator; using char_allocator_type = typename std::allocator_traits:: template rebind_alloc; using string_type = std::basic_string, char_allocator_type>; - using json_pointer_type = jsonpointer::basic_json_pointer; + using jpointer_type = jsonpointer::basic_json_pointer; using string_allocator_type = typename std::allocator_traits:: template rebind_alloc; - using json_pointer_allocator_type = typename std::allocator_traits:: template rebind_alloc; + using jpointer_allocator_type = typename std::allocator_traits:: template rebind_alloc; using string_string_allocator_type = typename std::allocator_traits:: template rebind_alloc>; + using jpointer_string_allocator_type = typename std::allocator_traits:: template rebind_alloc>; using string_vector_allocator_type = typename std::allocator_traits:: template rebind_alloc>>; using column_type = std::vector; using column_path_column_map_type = std::unordered_map,std::equal_to,string_vector_allocator_type>; @@ -126,8 +127,8 @@ class basic_csv_encoder final : public basic_json_visitor jsoncons::detail::write_double fp_; std::vector column_names_; - std::vector column_paths_; - std::unordered_map,std::equal_to,string_string_allocator_type> column_path_name_map_; + std::vector column_paths_; + std::unordered_map,std::equal_to,jpointer_string_allocator_type> column_path_name_map_; std::unordered_map,std::equal_to,string_string_allocator_type> column_path_value_map_; column_path_column_map_type column_path_column_map_; @@ -342,7 +343,7 @@ class basic_csv_encoder final : public basic_json_visitor bool first = true; for (std::size_t i = 0; i < column_paths_.size(); ++i) { - auto it = column_path_name_map_.find(column_paths_[i].string()); + auto it = column_path_name_map_.find(column_paths_[i]); if (it != column_path_name_map_.end()) { if (!first) @@ -381,7 +382,7 @@ class basic_csv_encoder final : public basic_json_visitor bool first = true; for (std::size_t i = 0; i < column_paths_.size(); ++i) { - auto it = column_path_name_map_.find(column_paths_[i].string()); + auto it = column_path_name_map_.find(column_paths_[i]); if (it != column_path_name_map_.end()) { if (!first) @@ -603,7 +604,7 @@ class basic_csv_encoder final : public basic_json_visitor std::size_t col = 0; for (std::size_t i = 0; i < column_paths_.size(); ++i) { - auto it = column_path_name_map_.find(column_paths_[i].string()); + auto it = column_path_name_map_.find(column_paths_[i]); if (it != column_path_name_map_.end()) { if (col > 0) @@ -650,7 +651,7 @@ class basic_csv_encoder final : public basic_json_visitor std::size_t col = 0; for (std::size_t i = 0; i < column_paths_.size(); ++i) { - auto it = column_path_name_map_.find(column_paths_[i].string()); + auto it = column_path_name_map_.find(column_paths_[i]); if (it != column_path_name_map_.end()) { if (col > 0) diff --git a/include/jsoncons_ext/jsonpointer/jsonpointer.hpp b/include/jsoncons_ext/jsonpointer/jsonpointer.hpp index 7a390e5c10..25e00bae57 100644 --- a/include/jsoncons_ext/jsonpointer/jsonpointer.hpp +++ b/include/jsoncons_ext/jsonpointer/jsonpointer.hpp @@ -1443,4 +1443,26 @@ namespace jsoncons { namespace jsonpointer { } // namespace jsonpointer } // namespace jsoncons +namespace std { + template + struct hash> + { + std::size_t operator()(const jsoncons::jsonpointer::basic_json_pointer& ptr) const noexcept + { + constexpr std::uint64_t prime{0x100000001B3}; + std::uint64_t result{0xcbf29ce484222325}; + + for (const auto& str : ptr) + { + for (std::size_t i = 0; i < str.length(); ++i) + { + result = (result * prime) ^ str[i]; + } + } + return result; + } + }; + +} // namespace std + #endif diff --git a/test/csv/src/csv_encoder_tests.cpp b/test/csv/src/csv_encoder_tests.cpp index b473f2a220..ffef049820 100644 --- a/test/csv/src/csv_encoder_tests.cpp +++ b/test/csv/src/csv_encoder_tests.cpp @@ -9,7 +9,7 @@ namespace csv = jsoncons::csv; TEST_CASE("test json to flat csv with column mappings") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(Number,Date Time @@ -122,12 +122,12 @@ TEST_CASE("test json to flat csv with column mappings") CHECK(expected == buf); } -#endif +//#endif } TEST_CASE("test json to flat csv") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -405,12 +405,12 @@ NY,LON,TOR;LON //std::cout << buf << "\n"; CHECK(expected == buf); } -#endif +//#endif } TEST_CASE("test json to non-flat csv with column mappings") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(Number,Date Time @@ -523,12 +523,12 @@ TEST_CASE("test json to non-flat csv with column mappings") CHECK(expected == buf); } -#endif +//#endif } TEST_CASE("test json to non-flat csv") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(/boolean,/datetime,/float,/nested/nested/date,/nested/nested/integer,/nested/time,/text @@ -719,13 +719,13 @@ NY,LON,TOR;LON CHECK(expected == buf); } -#endif +//#endif } TEST_CASE("test json to flat csv with column names") { -#if 0 +//#if 0 SECTION("array of objects to csv") { std::string expected = R"(boolean,datetime,float,text @@ -856,7 +856,7 @@ Chicago Sun-Times,1.27 j.dump(encoder); CHECK(expected == buf); } -#endif +//#endif SECTION("object of arrays and subarrays to csv with column_names") { From 96e06a5ec7cb1c6ea0f9c491b47ac2d0b6992e5e Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Thu, 23 Jan 2025 13:25:38 -0500 Subject: [PATCH 79/79] csv_encoder use json_pointer --- include/jsoncons_ext/csv/csv_encoder.hpp | 51 ++++++++++++------------ 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp index 47180b0cd5..4a1efb36df 100644 --- a/include/jsoncons_ext/csv/csv_encoder.hpp +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -40,11 +40,10 @@ class basic_csv_encoder final : public basic_json_visitor using jpointer_type = jsonpointer::basic_json_pointer; using string_allocator_type = typename std::allocator_traits:: template rebind_alloc; using jpointer_allocator_type = typename std::allocator_traits:: template rebind_alloc; - using string_string_allocator_type = typename std::allocator_traits:: template rebind_alloc>; using jpointer_string_allocator_type = typename std::allocator_traits:: template rebind_alloc>; - using string_vector_allocator_type = typename std::allocator_traits:: template rebind_alloc>>; + using jpointer_vector_allocator_type = typename std::allocator_traits:: template rebind_alloc>>; using column_type = std::vector; - using column_path_column_map_type = std::unordered_map,std::equal_to,string_vector_allocator_type>; + using column_path_column_map_type = std::unordered_map,std::equal_to,jpointer_vector_allocator_type>; private: static jsoncons::basic_string_view null_constant() { @@ -76,12 +75,12 @@ class basic_csv_encoder final : public basic_json_visitor multivalued_field, column_multivalued_field }; - + struct stack_item { stack_item_kind item_kind_; std::size_t count_{0}; - std::string column_path_; + jpointer_type column_path_; stack_item(stack_item_kind item_kind) noexcept : item_kind_(item_kind) @@ -99,6 +98,12 @@ class basic_csv_encoder final : public basic_json_visitor } }; + static const stack_item& parent(const std::vector& stack) + { + JSONCONS_ASSERT(stack.size() >= 2); + return stack[stack.size() - 2]; + } + Sink sink_; bool flat_; bool has_column_mapping_; @@ -129,7 +134,7 @@ class basic_csv_encoder final : public basic_json_visitor std::vector column_names_; std::vector column_paths_; std::unordered_map,std::equal_to,jpointer_string_allocator_type> column_path_name_map_; - std::unordered_map,std::equal_to,string_string_allocator_type> column_path_value_map_; + std::unordered_map,std::equal_to,jpointer_string_allocator_type> column_path_value_map_; column_path_column_map_type column_path_column_map_; std::size_t column_index_{0}; @@ -310,7 +315,7 @@ class basic_csv_encoder final : public basic_json_visitor } else { - stack_.back().column_path_ = stack_[stack_.size()-2].column_path_; + stack_.back().column_path_ = parent(stack_).column_path_; value_buffer_.clear(); stack_.emplace_back(stack_item_kind::multivalued_field); } @@ -336,7 +341,7 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: case stack_item_kind::object: - if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::row_mapping || stack_[stack_.size()-2].item_kind_ == stack_item_kind::flat_row_mapping) + if (parent(stack_).item_kind_ == stack_item_kind::row_mapping || parent(stack_).item_kind_ == stack_item_kind::flat_row_mapping) { if (stack_[0].count_ == 0) { @@ -365,7 +370,7 @@ class basic_csv_encoder final : public basic_json_visitor { sink_.push_back(field_delimiter_); } - auto it = column_path_value_map_.find(column_paths_[i].string()); + auto it = column_path_value_map_.find(column_paths_[i]); if (it != column_path_value_map_.end()) { sink_.append(it->second.data(), it->second.length()); @@ -399,7 +404,7 @@ class basic_csv_encoder final : public basic_json_visitor std::vector> columns; for (const auto& item : column_paths_) { - auto it = column_path_column_map_.find(item.string()); + auto it = column_path_column_map_.find(item); if (it != column_path_column_map_.end()) { columns.emplace_back((*it).second.cbegin(), (*it).second.cend()); @@ -597,7 +602,7 @@ class basic_csv_encoder final : public basic_json_visitor case stack_item_kind::flat_row_mapping: break; case stack_item_kind::flat_row: - if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::flat_row_mapping) + if (parent(stack_).item_kind_ == stack_item_kind::flat_row_mapping) { if (stack_[0].count_ == 0 && !column_path_name_map_.empty()) { @@ -624,7 +629,7 @@ class basic_csv_encoder final : public basic_json_visitor { sink_.push_back(field_delimiter_); } - auto it = column_path_value_map_.find(column_paths_[i].string()); + auto it = column_path_value_map_.find(column_paths_[i]); if (it != column_path_value_map_.end()) { sink_.append(it->second.data(), it->second.length()); @@ -644,7 +649,7 @@ class basic_csv_encoder final : public basic_json_visitor break; } case stack_item_kind::row: - if (stack_[stack_.size()-2].item_kind_ == stack_item_kind::row_mapping) + if (parent(stack_).item_kind_ == stack_item_kind::row_mapping) { if (stack_[0].count_ == 0) { @@ -672,7 +677,7 @@ class basic_csv_encoder final : public basic_json_visitor { sink_.push_back(field_delimiter_); } - auto it = column_path_value_map_.find(column_paths_[i].string()); + auto it = column_path_value_map_.find(column_paths_[i]); if (it != column_path_value_map_.end()) { sink_.append(it->second.data(), it->second.length()); @@ -711,8 +716,7 @@ class basic_csv_encoder final : public basic_json_visitor { case stack_item_kind::flat_object: { - stack_.back().column_path_ = stack_[stack_.size()-2].column_path_; - stack_.back().column_path_.push_back('/'); + stack_.back().column_path_ = parent(stack_).column_path_; stack_.back().column_path_.append(std::string(name)); if (!has_column_mapping_) { @@ -722,26 +726,24 @@ class basic_csv_encoder final : public basic_json_visitor } case stack_item_kind::object: { - stack_.back().column_path_ = stack_[stack_.size()-2].column_path_; - stack_.back().column_path_.push_back('/'); + stack_.back().column_path_ = parent(stack_).column_path_; stack_.back().column_path_.append(std::string(name)); if (!has_column_mapping_) { - column_path_name_map_.emplace(stack_.back().column_path_, stack_.back().column_path_); + column_path_name_map_.emplace(stack_.back().column_path_, stack_.back().column_path_.string()); } break; } case stack_item_kind::column_mapping: { - stack_.back().column_path_.erase(); - stack_.back().column_path_.push_back('/'); + stack_.back().column_path_.clear(); stack_.back().column_path_.append(std::string(name)); if (!has_column_mapping_) { column_path_name_map_.emplace(stack_.back().column_path_, name); column_paths_.emplace_back(stack_.back().column_path_); } - column_it_ = column_path_column_map_.emplace(stack_.back().column_path_, column_type{}).first; + column_it_ = column_path_column_map_.emplace(stack_.back().column_path_, column_type{alloc_}).first; break; } default: @@ -755,8 +757,7 @@ class basic_csv_encoder final : public basic_json_visitor buffer_.clear(); jsoncons::detail::from_integer(stack_.back().count_, buffer_); - stack_.back().column_path_ = stack_[stack_.size()-2].column_path_; - stack_.back().column_path_.push_back('/'); + stack_.back().column_path_ = parent(stack_).column_path_; stack_.back().column_path_.append(buffer_); if (stack_[0].count_ == 0) { @@ -766,7 +767,7 @@ class basic_csv_encoder final : public basic_json_visitor column_path_value_map_.emplace(stack_.back().column_path_, string_type{alloc_}); if (stack_.back().item_kind_ == stack_item_kind::row) { - column_path_name_map_.emplace(stack_.back().column_path_, stack_.back().column_path_); + column_path_name_map_.emplace(stack_.back().column_path_, stack_.back().column_path_.string()); } } }