From 56f9733abd36f04934d5ecd69d57281e9d1da90a Mon Sep 17 00:00:00 2001 From: fktn Date: Mon, 14 Oct 2024 15:25:05 +0900 Subject: [PATCH] Fix wrong parse result from single scalar document (#411) * fixed wrong parse result from single scalar document * clean up unused codes --- include/fkYAML/detail/input/deserializer.hpp | 111 ++++++++++++++----- single_include/fkYAML/node.hpp | 111 ++++++++++++++----- test/unit_test/test_deserializer_class.cpp | 46 +++++++- 3 files changed, 212 insertions(+), 56 deletions(-) diff --git a/include/fkYAML/detail/input/deserializer.hpp b/include/fkYAML/detail/input/deserializer.hpp index 19b254ad..ad3bb5a1 100644 --- a/include/fkYAML/detail/input/deserializer.hpp +++ b/include/fkYAML/detail/input/deserializer.hpp @@ -203,19 +203,48 @@ class basic_deserializer { lexer.get_lines_processed(), lexer.get_last_token_begin_pos(), context_state_t::FLOW_MAPPING, &root); token = lexer.get_next_token(); break; - default: { + case lexical_token_t::EXPLICIT_KEY_PREFIX: { + // If the explicit key prefix (? ) is detected here, the root node of current document must be a mapping. + // Also, tag and anchor if any are associated to the root mapping node. + // No get_next_token() call here to handle the token event in the deserialize_node() function. root = basic_node_type::mapping(); apply_directive_set(root); - if (found_props && line < lexer.get_lines_processed()) { - // If node properties and a followed node are on the different line, the properties belong to the root - // node. - apply_node_properties(root); - } + apply_node_properties(root); parse_context context( lexer.get_lines_processed(), lexer.get_last_token_begin_pos(), context_state_t::BLOCK_MAPPING, &root); m_context_stack.emplace_back(std::move(context)); break; } + case lexical_token_t::BLOCK_LITERAL_SCALAR: + case lexical_token_t::BLOCK_FOLDED_SCALAR: + // If a block scalar token is detected here, current document contains single scalar. + // Do nothing here since the token is handled in the deserialize_node() function. + break; + case lexical_token_t::PLAIN_SCALAR: + case lexical_token_t::SINGLE_QUOTED_SCALAR: + case lexical_token_t::DOUBLE_QUOTED_SCALAR: + case lexical_token_t::ALIAS_PREFIX: + // Defer handling the above token events until the next call on the deserialize_scalar() function since the + // meaning depends on subsequent events. + if (found_props && line < lexer.get_lines_processed()) { + // If node properties and a followed node are on the different line, the properties belong to the root + // node. + if (m_needs_anchor_impl) { + m_root_anchor_name = std::move(m_anchor_name); + m_needs_anchor_impl = false; + m_anchor_name.clear(); + } + + if (m_needs_tag_impl) { + m_root_tag_name = std::move(m_tag_name); + m_needs_tag_impl = false; + m_tag_name.clear(); + } + } + break; + default: + // Do nothing since current document has no contents. + break; } mp_current_node = &root; @@ -1092,14 +1121,21 @@ class basic_deserializer { // a scalar node *mp_current_node = std::move(node_value); - if (m_flow_context_depth > 0 || m_context_stack.back().state != context_state_t::BLOCK_MAPPING_EXPLICIT_KEY) { - m_context_stack.pop_back(); - mp_current_node = m_context_stack.back().p_node; + if FK_YAML_LIKELY (!m_context_stack.empty()) { + if (m_flow_context_depth > 0 || + m_context_stack.back().state != context_state_t::BLOCK_MAPPING_EXPLICIT_KEY) { + m_context_stack.pop_back(); + mp_current_node = m_context_stack.back().p_node; - if (m_flow_context_depth > 0) { - m_flow_token_state = flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX; + if (m_flow_context_depth > 0) { + m_flow_token_state = flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX; + } } } + else { + // single scalar document. + return; + } } /// @brief Deserialize a detected scalar node. @@ -1137,24 +1173,43 @@ class basic_deserializer { } if (mp_current_node->is_scalar()) { - parse_context& cur_context = m_context_stack.back(); - switch (cur_context.state) { - case context_state_t::BLOCK_MAPPING_EXPLICIT_KEY: - case context_state_t::BLOCK_MAPPING_EXPLICIT_VALUE: - m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); - break; - default: - if FK_YAML_UNLIKELY (cur_context.line == line) { - throw parse_error("Multiple mapping keys are specified on the same line.", line, indent); + if FK_YAML_LIKELY (!m_context_stack.empty()) { + parse_context& cur_context = m_context_stack.back(); + switch (cur_context.state) { + case context_state_t::BLOCK_MAPPING_EXPLICIT_KEY: + case context_state_t::BLOCK_MAPPING_EXPLICIT_VALUE: + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); + break; + default: + if FK_YAML_UNLIKELY (cur_context.line == line) { + throw parse_error("Multiple mapping keys are specified on the same line.", line, indent); + } + cur_context.line = line; + cur_context.indent = indent; + cur_context.state = context_state_t::BLOCK_MAPPING; + break; } - cur_context.line = line; - cur_context.indent = indent; - cur_context.state = context_state_t::BLOCK_MAPPING; - break; + + *mp_current_node = basic_node_type::mapping(); + apply_directive_set(*mp_current_node); } + else { + // root mapping node - *mp_current_node = basic_node_type::mapping(); - apply_directive_set(*mp_current_node); + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); + *mp_current_node = basic_node_type::mapping(); + apply_directive_set(*mp_current_node); + + // apply node properties if any to the root mapping node. + if (!m_root_anchor_name.empty()) { + mp_current_node->add_anchor_name(std::move(m_root_anchor_name)); + m_root_anchor_name.clear(); + } + if (!m_root_tag_name.empty()) { + mp_current_node->add_tag_name(std::move(m_root_tag_name)); + m_root_tag_name.clear(); + } + } } add_new_key(std::move(node), line, indent); } @@ -1213,6 +1268,10 @@ class basic_deserializer { std::string m_anchor_name {}; /// The last tag name. std::string m_tag_name {}; + /// The root YAML anchor name. (maybe empty and unused) + std::string m_root_anchor_name {}; + /// The root tag name. (maybe empty and unused) + std::string m_root_tag_name {}; }; FK_YAML_DETAIL_NAMESPACE_END diff --git a/single_include/fkYAML/node.hpp b/single_include/fkYAML/node.hpp index 6a1d8f21..d86d8f8e 100644 --- a/single_include/fkYAML/node.hpp +++ b/single_include/fkYAML/node.hpp @@ -6726,19 +6726,48 @@ class basic_deserializer { lexer.get_lines_processed(), lexer.get_last_token_begin_pos(), context_state_t::FLOW_MAPPING, &root); token = lexer.get_next_token(); break; - default: { + case lexical_token_t::EXPLICIT_KEY_PREFIX: { + // If the explicit key prefix (? ) is detected here, the root node of current document must be a mapping. + // Also, tag and anchor if any are associated to the root mapping node. + // No get_next_token() call here to handle the token event in the deserialize_node() function. root = basic_node_type::mapping(); apply_directive_set(root); - if (found_props && line < lexer.get_lines_processed()) { - // If node properties and a followed node are on the different line, the properties belong to the root - // node. - apply_node_properties(root); - } + apply_node_properties(root); parse_context context( lexer.get_lines_processed(), lexer.get_last_token_begin_pos(), context_state_t::BLOCK_MAPPING, &root); m_context_stack.emplace_back(std::move(context)); break; } + case lexical_token_t::BLOCK_LITERAL_SCALAR: + case lexical_token_t::BLOCK_FOLDED_SCALAR: + // If a block scalar token is detected here, current document contains single scalar. + // Do nothing here since the token is handled in the deserialize_node() function. + break; + case lexical_token_t::PLAIN_SCALAR: + case lexical_token_t::SINGLE_QUOTED_SCALAR: + case lexical_token_t::DOUBLE_QUOTED_SCALAR: + case lexical_token_t::ALIAS_PREFIX: + // Defer handling the above token events until the next call on the deserialize_scalar() function since the + // meaning depends on subsequent events. + if (found_props && line < lexer.get_lines_processed()) { + // If node properties and a followed node are on the different line, the properties belong to the root + // node. + if (m_needs_anchor_impl) { + m_root_anchor_name = std::move(m_anchor_name); + m_needs_anchor_impl = false; + m_anchor_name.clear(); + } + + if (m_needs_tag_impl) { + m_root_tag_name = std::move(m_tag_name); + m_needs_tag_impl = false; + m_tag_name.clear(); + } + } + break; + default: + // Do nothing since current document has no contents. + break; } mp_current_node = &root; @@ -7615,14 +7644,21 @@ class basic_deserializer { // a scalar node *mp_current_node = std::move(node_value); - if (m_flow_context_depth > 0 || m_context_stack.back().state != context_state_t::BLOCK_MAPPING_EXPLICIT_KEY) { - m_context_stack.pop_back(); - mp_current_node = m_context_stack.back().p_node; + if FK_YAML_LIKELY (!m_context_stack.empty()) { + if (m_flow_context_depth > 0 || + m_context_stack.back().state != context_state_t::BLOCK_MAPPING_EXPLICIT_KEY) { + m_context_stack.pop_back(); + mp_current_node = m_context_stack.back().p_node; - if (m_flow_context_depth > 0) { - m_flow_token_state = flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX; + if (m_flow_context_depth > 0) { + m_flow_token_state = flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX; + } } } + else { + // single scalar document. + return; + } } /// @brief Deserialize a detected scalar node. @@ -7660,24 +7696,43 @@ class basic_deserializer { } if (mp_current_node->is_scalar()) { - parse_context& cur_context = m_context_stack.back(); - switch (cur_context.state) { - case context_state_t::BLOCK_MAPPING_EXPLICIT_KEY: - case context_state_t::BLOCK_MAPPING_EXPLICIT_VALUE: - m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); - break; - default: - if FK_YAML_UNLIKELY (cur_context.line == line) { - throw parse_error("Multiple mapping keys are specified on the same line.", line, indent); + if FK_YAML_LIKELY (!m_context_stack.empty()) { + parse_context& cur_context = m_context_stack.back(); + switch (cur_context.state) { + case context_state_t::BLOCK_MAPPING_EXPLICIT_KEY: + case context_state_t::BLOCK_MAPPING_EXPLICIT_VALUE: + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); + break; + default: + if FK_YAML_UNLIKELY (cur_context.line == line) { + throw parse_error("Multiple mapping keys are specified on the same line.", line, indent); + } + cur_context.line = line; + cur_context.indent = indent; + cur_context.state = context_state_t::BLOCK_MAPPING; + break; } - cur_context.line = line; - cur_context.indent = indent; - cur_context.state = context_state_t::BLOCK_MAPPING; - break; + + *mp_current_node = basic_node_type::mapping(); + apply_directive_set(*mp_current_node); } + else { + // root mapping node - *mp_current_node = basic_node_type::mapping(); - apply_directive_set(*mp_current_node); + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); + *mp_current_node = basic_node_type::mapping(); + apply_directive_set(*mp_current_node); + + // apply node properties if any to the root mapping node. + if (!m_root_anchor_name.empty()) { + mp_current_node->add_anchor_name(std::move(m_root_anchor_name)); + m_root_anchor_name.clear(); + } + if (!m_root_tag_name.empty()) { + mp_current_node->add_tag_name(std::move(m_root_tag_name)); + m_root_tag_name.clear(); + } + } } add_new_key(std::move(node), line, indent); } @@ -7736,6 +7791,10 @@ class basic_deserializer { std::string m_anchor_name {}; /// The last tag name. std::string m_tag_name {}; + /// The root YAML anchor name. (maybe empty and unused) + std::string m_root_anchor_name {}; + /// The root tag name. (maybe empty and unused) + std::string m_root_tag_name {}; }; FK_YAML_DETAIL_NAMESPACE_END diff --git a/test/unit_test/test_deserializer_class.cpp b/test/unit_test/test_deserializer_class.cpp index 1fd76c1d..3de02a4d 100644 --- a/test/unit_test/test_deserializer_class.cpp +++ b/test/unit_test/test_deserializer_class.cpp @@ -15,8 +15,7 @@ TEST_CASE("Deserializer_EmptyInput") { fkyaml::node root; REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter(" "))); - REQUIRE(root.is_mapping()); - REQUIRE(root.empty()); + REQUIRE(root.is_null()); } TEST_CASE("Deserializer_KeySeparator") { @@ -98,6 +97,12 @@ TEST_CASE("Deserializer_BooleanValue") { REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter("test:\n - False"))); REQUIRE(root["test"][0].get_value() == false); } + + SECTION("root scalar") { + REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter("true"))); + REQUIRE(root.is_boolean()); + REQUIRE(root.get_value() == true); + } } TEST_CASE("Deserializer_IntegerKey") { @@ -124,6 +129,12 @@ TEST_CASE("Deserializer_IntegerKey") { REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter("test:\n - 123"))); REQUIRE(root["test"][0].get_value() == 123); } + + SECTION("root scalar") { + REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter("123"))); + REQUIRE(root.is_integer()); + REQUIRE(root.get_value() == 123); + } } TEST_CASE("Deserializer_FloatingPointNumberKey") { @@ -150,6 +161,12 @@ TEST_CASE("Deserializer_FloatingPointNumberKey") { REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter("test:\n - 1.23e-5"))); REQUIRE(root["test"][0].get_value() == 1.23e-5); } + + SECTION("root scalar") { + REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter("3.14"))); + REQUIRE(root.is_float_number()); + REQUIRE(root.get_value() == 3.14); + } } TEST_CASE("Deserializer_BlockLiteralScalar") { @@ -206,6 +223,17 @@ TEST_CASE("Deserializer_BlockLiteralScalar") { REQUIRE(val_node.is_string()); REQUIRE(val_node.get_value_ref() == "map value"); } + + SECTION("root scalar") { + std::string input = "--- |\n" + " first sentence.\n" + " second sentence.\n" + " last sentence.\n"; + + REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter(input))); + REQUIRE(root.is_string()); + REQUIRE(root.get_value_ref() == "first sentence.\nsecond sentence.\nlast sentence.\n"); + } } TEST_CASE("Deserializer_BlockFoldedScalar") { @@ -262,6 +290,17 @@ TEST_CASE("Deserializer_BlockFoldedScalar") { REQUIRE(val_node.is_string()); REQUIRE(val_node.get_value_ref() == "map value"); } + + SECTION("root scalar") { + std::string input = "--- >\n" + " first sentence.\n" + " second sentence.\n" + " last sentence.\n"; + + REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter(input))); + REQUIRE(root.is_string()); + REQUIRE(root.get_value_ref() == "first sentence. second sentence. last sentence.\n"); + } } TEST_CASE("Deserializer_ScalarConversionErrorHandling") { @@ -2189,8 +2228,7 @@ TEST_CASE("Deserializer_InvalidDirective") { fkyaml::node root; REQUIRE_NOTHROW(root = deserializer.deserialize(fkyaml::detail::input_adapter("%INVALID foo bar"))); - REQUIRE(root.is_mapping()); - REQUIRE(root.empty()); + REQUIRE(root.is_null()); } TEST_CASE("Deserializer_Anchor") {