Skip to content

Commit

Permalink
Merge pull request #245 from fktn-k/feature/240_support_explicit_mapp…
Browse files Browse the repository at this point in the history
…ing_keys

#240 Support explicit block mappings & non-scalar nodes as mapping keys
  • Loading branch information
fktn-k authored Dec 10, 2023
2 parents ffe16ae + 3bcb93d commit e98d1d0
Show file tree
Hide file tree
Showing 5 changed files with 209 additions and 130 deletions.
274 changes: 145 additions & 129 deletions include/fkYAML/detail/input/deserializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,86 @@ class basic_deserializer
case lexical_token_t::END_OF_BUFFER:
// This handles an empty input.
break;
case lexical_token_t::EXPLICIT_KEY_PREFIX: {
bool needs_to_move_back = !m_indent_stack.empty() && cur_indent < m_indent_stack.back().first;
if (needs_to_move_back)
{
auto target_itr = std::find_if( // LCOV_EXCL_LINE
m_indent_stack.rbegin(),
m_indent_stack.rend(),
[cur_indent](std::pair<std::size_t, bool> p) { return cur_indent > p.first; });

auto pop_num = std::distance(m_indent_stack.rbegin(), target_itr);
for (auto i = 0; i < pop_num; i++)
{
// move back to the previous container node.
m_current_node = m_node_stack.back();
m_node_stack.pop_back();
m_indent_stack.pop_back();
}
}

type = lexer.get_next_token();
if (type == lexical_token_t::SEQUENCE_BLOCK_PREFIX)
{
m_node_stack.push_back(m_current_node);
m_indent_stack.emplace_back(cur_indent, true);
m_indent_stack.emplace_back(lexer.get_last_token_begin_pos(), false);
m_current_node = new BasicNodeType(node_t::SEQUENCE);
set_yaml_version(*m_current_node);
break;
}

if (m_current_node->is_null())
{
*m_current_node = BasicNodeType::mapping();
}

m_node_stack.push_back(m_current_node);
m_indent_stack.emplace_back(cur_indent, true);
m_current_node = new BasicNodeType();
set_yaml_version(*m_current_node);
cur_indent = lexer.get_last_token_begin_pos();
cur_line = lexer.get_lines_processed();

continue;
}
case lexical_token_t::KEY_SEPARATOR: {
bool is_stack_empty = m_node_stack.empty();
if (is_stack_empty)
{
throw parse_error("A key separator found without key.", cur_line, cur_indent);
}
break;

bool is_implicit = m_indent_stack.empty() || cur_indent > m_indent_stack.back().first;
if (is_implicit)
{
break;
}

while (!m_indent_stack.back().second)
{
m_current_node = m_node_stack.back();
m_node_stack.pop_back();
m_indent_stack.pop_back();
}
BasicNodeType* key_node = m_current_node;
m_node_stack.back()->template get_value_ref<mapping_type&>().emplace(*key_node, BasicNodeType());
m_current_node = &(m_node_stack.back()->operator[](*key_node));
delete key_node;
key_node = nullptr;
m_node_stack.push_back(m_node_stack.back());
m_indent_stack.back().second = false;

type = lexer.get_next_token();
if (type == lexical_token_t::SEQUENCE_BLOCK_PREFIX)
{
*m_current_node = BasicNodeType::sequence();
set_yaml_version(*m_current_node);
}
cur_indent = lexer.get_last_token_begin_pos();
cur_line = lexer.get_lines_processed();
continue;
}
case lexical_token_t::VALUE_SEPARATOR:
break;
Expand Down Expand Up @@ -123,12 +196,12 @@ class basic_deserializer
bool is_empty = m_current_node->empty();
if (is_empty)
{
m_indent_stack.push_back(cur_indent);
m_indent_stack.emplace_back(cur_indent, false);
break;
}

// move back to the previous sequence if necessary.
while (!m_current_node->is_sequence() || cur_indent != m_indent_stack.back())
while (!m_current_node->is_sequence() || cur_indent != m_indent_stack.back().first)
{
m_current_node = m_node_stack.back();
m_node_stack.pop_back();
Expand All @@ -144,7 +217,7 @@ class basic_deserializer
}

// move back to the previous sequence if necessary.
while (!m_current_node->is_sequence() || cur_indent != m_indent_stack.back())
while (!m_current_node->is_sequence() || cur_indent != m_indent_stack.back().first)
{
m_current_node = m_node_stack.back();
m_node_stack.pop_back();
Expand Down Expand Up @@ -194,152 +267,48 @@ class basic_deserializer
m_node_stack.pop_back();
break;
case lexical_token_t::NULL_VALUE: {
if (m_current_node->is_mapping())
bool do_continue =
deserialize_scalar(lexer, BasicNodeType(lexer.get_null()), cur_indent, cur_line, type);
if (do_continue)
{
add_new_key(BasicNodeType(), cur_indent, cur_line);
break;
}

if (m_current_node->is_sequence())
{
type = lexer.get_next_token();

// check if the current target token is a key or not.
if (type == lexical_token_t::KEY_SEPARATOR || type == lexical_token_t::MAPPING_BLOCK_PREFIX)
{
add_new_key(BasicNodeType(), cur_indent, cur_line);
cur_indent = lexer.get_last_token_begin_pos();
cur_line = lexer.get_lines_processed();
continue;
}

assign_node_value(BasicNodeType());
cur_indent = lexer.get_last_token_begin_pos();
cur_line = lexer.get_lines_processed();
continue;
}

assign_node_value(BasicNodeType());
break;
}
case lexical_token_t::BOOLEAN_VALUE: {
if (m_current_node->is_mapping())
bool do_continue =
deserialize_scalar(lexer, BasicNodeType(lexer.get_boolean()), cur_indent, cur_line, type);
if (do_continue)
{
add_new_key(lexer.get_boolean(), cur_indent, cur_line);
break;
}

if (m_current_node->is_sequence())
{
boolean_type boolean = lexer.get_boolean();
type = lexer.get_next_token();

// check if the current target token is a key or not.
if (type == lexical_token_t::KEY_SEPARATOR || type == lexical_token_t::MAPPING_BLOCK_PREFIX)
{
add_new_key(boolean, cur_indent, cur_line);
cur_indent = lexer.get_last_token_begin_pos();
cur_line = lexer.get_lines_processed();
continue;
}

assign_node_value(BasicNodeType(boolean));
cur_indent = lexer.get_last_token_begin_pos();
cur_line = lexer.get_lines_processed();
continue;
}

assign_node_value(BasicNodeType(lexer.get_boolean()));
break;
}
case lexical_token_t::INTEGER_VALUE: {
if (m_current_node->is_mapping())
bool do_continue =
deserialize_scalar(lexer, BasicNodeType(lexer.get_integer()), cur_indent, cur_line, type);
if (do_continue)
{
add_new_key(lexer.get_integer(), cur_indent, cur_line);
break;
}

if (m_current_node->is_sequence())
{
integer_type integer = lexer.get_integer();
type = lexer.get_next_token();

// check if the current target token is a key or not.
if (type == lexical_token_t::KEY_SEPARATOR || type == lexical_token_t::MAPPING_BLOCK_PREFIX)
{
add_new_key(integer, cur_indent, cur_line);
cur_indent = lexer.get_last_token_begin_pos();
cur_line = lexer.get_lines_processed();
continue;
}

assign_node_value(BasicNodeType(integer));
cur_indent = lexer.get_last_token_begin_pos();
cur_line = lexer.get_lines_processed();
continue;
}

assign_node_value(BasicNodeType(lexer.get_integer()));
break;
}
case lexical_token_t::FLOAT_NUMBER_VALUE: {
if (m_current_node->is_mapping())
bool do_continue =
deserialize_scalar(lexer, BasicNodeType(lexer.get_float_number()), cur_indent, cur_line, type);
if (do_continue)
{
add_new_key(lexer.get_float_number(), cur_indent, cur_line);
break;
}

if (m_current_node->is_sequence())
{
float_number_type float_val = lexer.get_float_number();
type = lexer.get_next_token();

// check if the current target token is a key or not.
if (type == lexical_token_t::KEY_SEPARATOR || type == lexical_token_t::MAPPING_BLOCK_PREFIX)
{
add_new_key(float_val, cur_indent, cur_line);
cur_indent = lexer.get_last_token_begin_pos();
cur_line = lexer.get_lines_processed();
continue;
}

assign_node_value(BasicNodeType(float_val));
cur_indent = lexer.get_last_token_begin_pos();
cur_line = lexer.get_lines_processed();
continue;
}

assign_node_value(BasicNodeType(lexer.get_float_number()));
break;
}
case lexical_token_t::STRING_VALUE: {
if (m_current_node->is_mapping())
{
add_new_key(lexer.get_string(), cur_indent, cur_line);
break;
}

string_type str = lexer.get_string();
if (m_current_node->is_sequence())
bool do_continue =
deserialize_scalar(lexer, BasicNodeType(lexer.get_string()), cur_indent, cur_line, type);
if (do_continue)
{
type = lexer.get_next_token();

// check if the current target token is a key or not.
if (type == lexical_token_t::KEY_SEPARATOR || type == lexical_token_t::MAPPING_BLOCK_PREFIX)
{
add_new_key(str, cur_indent, cur_line);
cur_indent = lexer.get_last_token_begin_pos();
cur_line = lexer.get_lines_processed();
continue;
}

assign_node_value(BasicNodeType(str));
cur_indent = lexer.get_last_token_begin_pos();
cur_line = lexer.get_lines_processed();
continue;
}

assign_node_value(BasicNodeType(str));
break;
}
case lexical_token_t::END_OF_DIRECTIVES:
Expand Down Expand Up @@ -368,9 +337,12 @@ class basic_deserializer
/// @param key a key string to be added to the current YAML node.
void add_new_key(const BasicNodeType& key, const std::size_t indent, const std::size_t line)
{
if (!m_indent_stack.empty() && indent < m_indent_stack.back())
if (!m_indent_stack.empty() && indent < m_indent_stack.back().first)
{
auto target_itr = std::find(m_indent_stack.rbegin(), m_indent_stack.rend(), indent);
auto target_itr =
std::find_if(m_indent_stack.rbegin(), m_indent_stack.rend(), [indent](std::pair<std::size_t, bool> p) {
return indent == p.first;
});
bool is_indent_valid = (target_itr != m_indent_stack.rend());
if (!is_indent_valid)
{
Expand Down Expand Up @@ -398,7 +370,7 @@ class basic_deserializer
bool is_empty = map.empty();
if (is_empty)
{
m_indent_stack.push_back(indent);
m_indent_stack.emplace_back(indent, false);
}
else
{
Expand Down Expand Up @@ -443,8 +415,52 @@ class basic_deserializer
m_needs_anchor_impl = false;
m_anchor_name.clear();
}
m_current_node = m_node_stack.back();
m_node_stack.pop_back();
if (!m_indent_stack.back().second)
{
m_current_node = m_node_stack.back();
m_node_stack.pop_back();
}
}

/// @brief Deserialize a detected scalar node.
/// @param node A detected scalar node by a lexer.
/// @param indent The current indentation width. Can be updated in this function.
/// @param line The number of processed lines. Can be updated in this function.
/// @return true if next token has already been got, false otherwise.
template <typename LexerType>
bool deserialize_scalar(
LexerType& lexer, BasicNodeType&& node, std::size_t& indent, std::size_t& line, lexical_token_t& type)
{
if (m_current_node->is_mapping())
{
add_new_key(node, indent, line);
return false;
}

type = lexer.get_next_token();
if (type == lexical_token_t::KEY_SEPARATOR || type == lexical_token_t::MAPPING_BLOCK_PREFIX)
{
if (m_current_node->is_scalar())
{
if (line != lexer.get_lines_processed())
{
// This path is for explicit mapping key separator(:)
assign_node_value(std::move(node));
indent = lexer.get_last_token_begin_pos();
line = lexer.get_lines_processed();
return true;
}
*m_current_node = BasicNodeType::mapping();
}
add_new_key(node, indent, line);
}
else
{
assign_node_value(std::move(node));
}
indent = lexer.get_last_token_begin_pos();
line = lexer.get_lines_processed();
return true;
}

/// @brief Set the yaml_version_t object to the given node.
Expand Down Expand Up @@ -472,7 +488,7 @@ class basic_deserializer
/// The stack of YAML nodes.
std::vector<BasicNodeType*> m_node_stack {};
/// The stack of indentation widths.
std::vector<std::size_t> m_indent_stack {};
std::vector<std::pair<std::size_t /*indent*/, bool /*is_explicit_key*/>> m_indent_stack {};
/// The YAML version specification type.
yaml_version_t m_yaml_version {yaml_version_t::VER_1_2};
/// A flag to determine the need for YAML anchor node implementation.
Expand Down
11 changes: 10 additions & 1 deletion include/fkYAML/detail/input/lexical_analyzer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,16 @@ class lexical_analyzer
switch (current)
{
case end_of_input: // end of input buffer
return lexical_token_t::END_OF_BUFFER;
return m_last_token_type = lexical_token_t::END_OF_BUFFER;
case '?':
switch (m_input_handler.get_next())
{
case ' ':
return m_last_token_type = lexical_token_t::EXPLICIT_KEY_PREFIX;
default:
m_input_handler.unget();
return m_last_token_type = scan_string();
}
case ':': // key separater
switch (m_input_handler.get_next())
{
Expand Down
1 change: 1 addition & 0 deletions include/fkYAML/detail/types/lexical_token_t.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ namespace detail
enum class lexical_token_t
{
END_OF_BUFFER, //!< the end of input buffer.
EXPLICIT_KEY_PREFIX, //!< the character for explicit mapping key prefix `?`.
KEY_SEPARATOR, //!< the key separater `:`
VALUE_SEPARATOR, //!< the value separater `,`
ANCHOR_PREFIX, //!< the character for anchor prefix `&`
Expand Down
Loading

0 comments on commit e98d1d0

Please sign in to comment.