diff --git a/include/fkYAML/detail/conversions/from_string.hpp b/include/fkYAML/detail/conversions/from_string.hpp new file mode 100644 index 00000000..cfc3fe4e --- /dev/null +++ b/include/fkYAML/detail/conversions/from_string.hpp @@ -0,0 +1,326 @@ +/** + * _______ __ __ __ _____ __ __ __ + * | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library + * | __| _ < \_ _/| ___ | _ | |___ version 0.1.1 + * |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML + * + * SPDX-FileCopyrightText: 2023 Kensuke Fukutani + * SPDX-License-Identifier: MIT + * + * @file + */ + +#ifndef FK_YAML_DETAIL_CONVERSIONS_FROM_STRING_HPP_ +#define FK_YAML_DETAIL_CONVERSIONS_FROM_STRING_HPP_ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +FK_YAML_NAMESPACE_BEGIN + +/** + * @namespace detail + * @brief namespace for internal implementations of fkYAML library. + */ +namespace detail +{ + +using fkyaml::exception; + +/** + * @brief Convert a string YAML token to a ValueType object. + * + * @tparam ValueType A target value type. + * @tparam CharType The type of characters in a source string. + */ +template +inline ValueType from_string(const std::basic_string& s, type_tag /*unused*/); + +/** + * @brief Specialization of from_string() for null values with std::string + * + * @tparam N/A + */ +template <> +inline std::nullptr_t from_string(const std::string& s, type_tag /*unused*/) +{ + if (s == "null" || s == "Null" || s == "NULL" || s == "~") + { + return nullptr; + } + + throw exception("Cannot convert a string into a null value."); +} + +/** + * @brief Specialization of from_string() for boolean values with std::string. + * + * @tparam N/A + */ +template <> +inline bool from_string(const std::string& s, type_tag /*unused*/) +{ + if (s == "true" || s == "True" || s == "TRUE") + { + return true; + } + + if (s == "false" || s == "False" || s == "FALSE") + { + return false; + } + + throw exception("Cannot convert a string into a boolean value."); +} + +/** + * @brief Specialization of from_string() for int values with std::string. + * + * @tparam N/A + */ +template <> +inline int from_string(const std::string& s, type_tag /*unused*/) +{ + std::size_t idx = 0; + long ret = 0; + + try + { + ret = std::stoi(s, &idx, 0); + } + catch (const std::exception& /*unused*/) + { + throw exception("Failed to convert a string into an int value."); + } + + return ret; +} + +/** + * @brief Specialization of from_string() for long values with std::string. + * + * @tparam N/A + */ +template <> +inline long from_string(const std::string& s, type_tag /*unused*/) +{ + std::size_t idx = 0; + long ret = 0; + + try + { + ret = std::stol(s, &idx, 0); + } + catch (const std::exception& /*unused*/) + { + throw exception("Failed to convert a string into a long value."); + } + + return ret; +} + +/** + * @brief Specialization of from_string() for long long values with std::string. + * + * @tparam N/A + */ +template <> +inline long long from_string(const std::string& s, type_tag /*unused*/) +{ + std::size_t idx = 0; + long long ret = 0; + + try + { + ret = std::stoll(s, &idx, 0); + } + catch (const std::exception& /*unused*/) + { + throw exception("Failed to convert a string into a long long value."); + } + + return ret; +} + +/** + * @brief Partial specialization of from_string() for other signed integer types with std::string. + * + * @tparam SignedIntType A signed integer type other than long long. + */ +template +inline enable_if_t< + conjunction< + is_non_bool_integral, std::is_signed, negation>, + negation>, negation>>::value, + SignedIntType> +from_string(const std::string& s, type_tag /*unused*/) +{ + const auto tmp_ret = from_string(s, type_tag {}); + if (static_cast(std::numeric_limits::max()) < tmp_ret) + { + throw exception("Failed to convert a long long value into a SignedIntegerType value."); + } + + return static_cast(tmp_ret); +} + +/** + * @brief Specialization of from_string() for unsigned long values with std::string. + * + * @tparam N/A + */ +template <> +inline unsigned long from_string(const std::string& s, type_tag /*unused*/) +{ + std::size_t idx = 0; + unsigned long ret = 0; + + try + { + ret = std::stoul(s, &idx, 0); + } + catch (const std::exception& /*unused*/) + { + throw exception("Failed to convert a string into an unsigned long value."); + } + + return ret; +} + +/** + * @brief Specialization of from_string() for unsigned long long values with std::string. + * + * @tparam N/A + */ +template <> +inline unsigned long long from_string(const std::string& s, type_tag /*unused*/) +{ + std::size_t idx = 0; + unsigned long long ret = 0; + + try + { + ret = std::stoull(s, &idx, 0); + } + catch (const std::exception& /*unused*/) + { + throw exception("Failed to convert a string into an unsigned long long value."); + } + + return ret; +} + +/** + * @brief Partial specialization of from_string() for other unsigned integer types with std::string. + * + * @tparam UnsignedIntType An unsigned integer type other than unsigned long long. + */ +template +inline enable_if_t< + conjunction< + is_non_bool_integral, std::is_unsigned, + negation>, + negation>>::value, + UnsignedIntType> +from_string(const std::string& s, type_tag /*unused*/) +{ + const auto tmp_ret = from_string(s, type_tag {}); + if (static_cast(std::numeric_limits::max()) < tmp_ret) + { + throw exception("Failed to convert an unsigned long long into an UnsignedInteger value."); + } + + return static_cast(tmp_ret); +} + +/** + * @brief Specialization of from_string() for float values with std::string. + * + * @tparam N/A + */ +template <> +inline float from_string(const std::string& s, type_tag /*unused*/) +{ + if (s == ".inf" || s == ".Inf" || s == ".INF") + { + return std::numeric_limits::infinity(); + } + + if (s == "-.inf" || s == "-.Inf" || s == "-.INF") + { + static_assert(std::numeric_limits::is_iec559, "IEEE 754 required."); + return -1 * std::numeric_limits::infinity(); + } + + if (s == ".nan" || s == ".NaN" || s == ".NAN") + { + return std::nanf(""); + } + + std::size_t idx = 0; + float ret = 0.0f; + + try + { + ret = std::stof(s, &idx); + } + catch (const std::exception& /*unused*/) + { + throw exception("Failed to a string into a float value."); + } + + return ret; +} + +/** + * @brief Specialization of from_string() for double values with std::string. + * + * @tparam N/A + */ +template <> +inline double from_string(const std::string& s, type_tag /*unused*/) +{ + if (s == ".inf" || s == ".Inf" || s == ".INF") + { + return std::numeric_limits::infinity(); + } + + if (s == "-.inf" || s == "-.Inf" || s == "-.INF") + { + static_assert(std::numeric_limits::is_iec559, "IEEE 754 required."); + return -1 * std::numeric_limits::infinity(); + } + + if (s == ".nan" || s == ".NaN" || s == ".NAN") + { + return std::nan(""); + } + + std::size_t idx = 0; + double ret = 0.0; + + try + { + ret = std::stod(s, &idx); + } + catch (const std::exception& /*unused*/) + { + throw exception("Failed to a string into a double value."); + } + + return ret; +} + +} // namespace detail + +FK_YAML_NAMESPACE_END + +#endif /* FK_YAML_DETAIL_CONVERSIONS_FROM_STRING_HPP_ */ \ No newline at end of file diff --git a/include/fkYAML/detail/input/deserializer.hpp b/include/fkYAML/detail/input/deserializer.hpp index 07983373..510e85ba 100644 --- a/include/fkYAML/detail/input/deserializer.hpp +++ b/include/fkYAML/detail/input/deserializer.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/include/fkYAML/detail/input/input_handler.hpp b/include/fkYAML/detail/input/input_handler.hpp new file mode 100644 index 00000000..014ab6d1 --- /dev/null +++ b/include/fkYAML/detail/input/input_handler.hpp @@ -0,0 +1,186 @@ +/** + * _______ __ __ __ _____ __ __ __ + * | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library + * | __| _ < \_ _/| ___ | _ | |___ version 0.1.1 + * |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML + * + * SPDX-FileCopyrightText: 2023 Kensuke Fukutani + * SPDX-License-Identifier: MIT + * + * @file + */ + +#ifndef FK_YAML_DETAIL_INPUT_INPUT_HANDLER_HPP_ +#define FK_YAML_DETAIL_INPUT_INPUT_HANDLER_HPP_ + +#include +#include +#include + +#include +#include +#include + +/** + * @namespace fkyaml + * @brief namespace for fkYAML library. + */ +FK_YAML_NAMESPACE_BEGIN + +/** + * @namespace detail + * @brief namespace for internal implementations of fkYAML library. + */ +namespace detail +{ + +/** + * @brief An input buffer handler. + * + * @tparam InputAdapterType The type of the input adapter. + */ +template ::value, int> = 0> +class input_handler +{ +public: + //!< The type of character traits of the input buffer. + using char_traits_type = std::char_traits; + //!< The type of characters of the input buffer. + using char_type = typename char_traits_type::char_type; + //!< The type of integers for the input buffer. + using int_type = typename char_traits_type::int_type; + //!< The type of strings of the input buffer. + using string_type = std::basic_string; + + /** + * @brief Construct a new input_handler object. + * + * @param input_adapter An input adapter object + */ + explicit input_handler(InputAdapterType&& input_adapter) + : m_input_adapter(std::move(input_adapter)), + m_cur_pos(0) + { + get_next(); + m_cur_pos = 0; + } + + /** + * @brief Get the character at the current position. + * + * @return int_type A character or EOF. + */ + int_type get_current() + { + return m_cache[m_cur_pos]; + } + + /** + * @brief Get the character at next position. + * + * @return int_type A character or EOF. + */ + int_type get_next() + { + // if already cached, return the cached value. + if (m_cur_pos + 1 < m_cache.size()) + { + return m_cache[++m_cur_pos]; + } + + int_type ret = m_input_adapter.get_character(); + if (ret != end_of_input || m_cache[m_cur_pos] != end_of_input) + { + // cache the return value for possible later use. + m_cache.push_back(ret); + ++m_cur_pos; + } + return ret; + // return m_input_adapter.get_character(); + } + + /** + * @brief Get the characters in the given range. + * + * @param length The length of characters retrieved from the current position. + * @param str A string which will contain the resulting characters. + * @return int_type 0 (for success) or EOF (for error). + */ + int_type get_range(size_t length, string_type& str) + { + str.clear(); + + if (get_current() == end_of_input) + { + return end_of_input; + } + + str += char_traits_type::to_char_type(get_current()); + + for (size_t i = 1; i < length; i++) + { + if (get_next() == end_of_input) + { + m_cur_pos -= i; + str.clear(); + return end_of_input; + } + str += char_traits_type::to_char_type(get_current()); + } + + return 0; + } + + /** + * @brief Move backward the current position. + */ + void unget() + { + if (m_cur_pos > 0) + { + // just move back the cursor. (no action for adapter) + --m_cur_pos; + } + } + + /** + * @brief Move backward the current position to the given range. + * + * @param length The length of moving backward. + */ + void unget_range(size_t length) + { + m_cur_pos = (m_cur_pos > length) ? m_cur_pos - length : 0; + } + + /** + * @brief Check if the next character is the expected one. + * + * @param expected An expected next character. + * @return true The next character is the expected one. + * @return false The next character is not the expected one. + */ + bool test_next_char(char_type expected) + { + bool ret = char_traits_type::eq(char_traits_type::to_char_type(get_next()), expected); + --m_cur_pos; + return ret; + } + +private: + //!< The value of EOF for the target character type. + static constexpr int_type end_of_input = char_traits_type::eof(); + + //!< An input adapter object. + InputAdapterType m_input_adapter; + //!< Cached characters retrieved from an input adapter object. + std::vector m_cache; + //!< The current position in an input buffer. + size_t m_cur_pos; +}; + +} // namespace detail + +FK_YAML_NAMESPACE_END + +#endif /* FK_YAML_DETAIL_INPUT_INPUT_HANDLER_HPP_ */ \ No newline at end of file diff --git a/include/fkYAML/detail/input/lexical_analyzer.hpp b/include/fkYAML/detail/input/lexical_analyzer.hpp index ea60e98e..4897a419 100644 --- a/include/fkYAML/detail/input/lexical_analyzer.hpp +++ b/include/fkYAML/detail/input/lexical_analyzer.hpp @@ -18,14 +18,17 @@ #include #include #include -#include +#include #include #include #include +#include +#include #include #include #include +#include #include /** @@ -41,34 +44,6 @@ FK_YAML_NAMESPACE_BEGIN namespace detail { -/** - * @enum lexical_token_t - * @brief Definition of lexical token types. - */ -enum class lexical_token_t -{ - END_OF_BUFFER, //!< the end of input buffer. - KEY_SEPARATOR, //!< the key separater `:` - VALUE_SEPARATOR, //!< the value separater `,` - ANCHOR_PREFIX, //!< the character for anchor prefix `&` - ALIAS_PREFIX, //!< the character for alias prefix `*` - COMMENT_PREFIX, //!< the character for comment prefix `#` - YAML_VER_DIRECTIVE, //!< a YAML version directive found. use get_yaml_version() to get a value. - TAG_DIRECTIVE, //!< a TAG directive found. use GetTagInfo() to get the tag information. - INVALID_DIRECTIVE, //!< an invalid directive found. do not try to get the value. - SEQUENCE_BLOCK_PREFIX, //!< the character for sequence block prefix `- ` - SEQUENCE_FLOW_BEGIN, //!< the character for sequence flow begin `[` - SEQUENCE_FLOW_END, //!< the character for sequence flow end `]` - MAPPING_BLOCK_PREFIX, //!< the character for mapping block prefix `:` - MAPPING_FLOW_BEGIN, //!< the character for mapping begin `{` - MAPPING_FLOW_END, //!< the character for mapping end `}` - NULL_VALUE, //!< a null value found. use get_null() to get a value. - BOOLEAN_VALUE, //!< a boolean value found. use get_boolean() to get a value. - INTEGER_VALUE, //!< an integer value found. use get_integer() to get a value. - FLOAT_NUMBER_VALUE, //!< a float number value found. use get_float_number() to get a value. - STRING_VALUE, //!< the character for string begin `"` or any character except the above ones -}; - /** * @class lexical_analyzer * @brief A class which lexically analizes YAML formatted inputs. @@ -81,9 +56,11 @@ template < class lexical_analyzer { private: - using char_traits_type = std::char_traits; + using input_handler_type = input_handler; + using char_traits_type = typename input_handler_type::char_traits_type; using char_type = typename char_traits_type::char_type; using char_int_type = typename char_traits_type::int_type; + using input_string_type = typename input_handler_type::string_type; public: using boolean_type = typename BasicNodeType::boolean_type; @@ -113,8 +90,7 @@ class lexical_analyzer * @brief Construct a new lexical_analyzer object. */ explicit lexical_analyzer(InputAdapterType&& input_adapter) - : m_input_adapter(std::move(input_adapter)), - m_last_char(0), + : m_input_handler(std::move(input_adapter)), m_value_buffer(), m_position_info() { @@ -130,105 +106,105 @@ class lexical_analyzer { skip_white_spaces(); - char_int_type current = get_current_character(); + char_int_type current = m_input_handler.get_current(); if (0x00 <= current && current <= 0x7F && isdigit(current)) { - return scan_number(); + return m_last_token_type = scan_number(); } switch (current) { - case '\0': - case char_traits_type::eof(): // end of input buffer + case end_of_input: // end of input buffer return lexical_token_t::END_OF_BUFFER; case ':': // key separater - switch (get_character()) + switch (m_input_handler.get_next()) { case ' ': break; case '\r': - if (!test_next_char('\n')) + if (m_input_handler.get_next() == '\n') { - return lexical_token_t::MAPPING_BLOCK_PREFIX; + m_input_handler.get_next(); } + return m_last_token_type = lexical_token_t::MAPPING_BLOCK_PREFIX; case '\n': - get_character(); - return lexical_token_t::MAPPING_BLOCK_PREFIX; + m_input_handler.get_next(); + return m_last_token_type = lexical_token_t::MAPPING_BLOCK_PREFIX; default: throw fkyaml::exception("Half-width spaces or newline codes are required after a key separater(:)."); } - get_character(); - return lexical_token_t::KEY_SEPARATOR; + m_input_handler.get_next(); + return m_last_token_type = lexical_token_t::KEY_SEPARATOR; case ',': // value separater - get_character(); - return lexical_token_t::VALUE_SEPARATOR; + m_input_handler.get_next(); + return m_last_token_type = lexical_token_t::VALUE_SEPARATOR; case '&': { // anchor prefix m_value_buffer.clear(); while (true) { - char_int_type next = get_character(); - if (next == char_traits_type::eof() || next == '\r' || next == '\n') + char_int_type next = m_input_handler.get_next(); + if (next == end_of_input || next == '\r' || next == '\n') { throw fkyaml::exception("An anchor label must be followed by some value."); } if (next == ' ') { - get_character(); + m_input_handler.get_next(); break; } m_value_buffer.push_back(next); } - return lexical_token_t::ANCHOR_PREFIX; + return m_last_token_type = lexical_token_t::ANCHOR_PREFIX; } case '*': { // alias prefix m_value_buffer.clear(); while (true) { - char_int_type next = get_character(); - if (next == ' ' || next == '\r' || next == '\n' || next == char_traits_type::eof()) + char_int_type next = m_input_handler.get_next(); + if (next == ' ' || next == '\r' || next == '\n' || next == end_of_input) { if (m_value_buffer.empty()) { throw fkyaml::exception("An alias prefix must be followed by some anchor name."); } - get_character(); + m_input_handler.get_next(); break; } m_value_buffer.push_back(next); } - return lexical_token_t::ALIAS_PREFIX; + return m_last_token_type = lexical_token_t::ALIAS_PREFIX; } case '#': // comment prefix scan_comment(); - return lexical_token_t::COMMENT_PREFIX; + return m_last_token_type = lexical_token_t::COMMENT_PREFIX; case '%': // directive prefix - return scan_directive(); + return m_last_token_type = scan_directive(); case '-': - if (!test_next_char(' ')) + if (!m_input_handler.test_next_char(' ')) { - return scan_number(); + return m_last_token_type = scan_number(); } // update_indent_width(); // Move a cursor to the beginning of the next token. - get_character(); - get_character(); + m_input_handler.get_next(); + m_input_handler.get_next(); - return lexical_token_t::SEQUENCE_BLOCK_PREFIX; + return m_last_token_type = lexical_token_t::SEQUENCE_BLOCK_PREFIX; case '[': // sequence flow begin - get_character(); - return lexical_token_t::SEQUENCE_FLOW_BEGIN; + m_input_handler.get_next(); + return m_last_token_type = lexical_token_t::SEQUENCE_FLOW_BEGIN; case ']': // sequence flow end - get_character(); - return lexical_token_t::SEQUENCE_FLOW_END; + m_input_handler.get_next(); + return m_last_token_type = lexical_token_t::SEQUENCE_FLOW_END; case '{': // mapping flow begin - get_character(); - return lexical_token_t::MAPPING_FLOW_BEGIN; + m_input_handler.get_next(); + return m_last_token_type = lexical_token_t::MAPPING_FLOW_BEGIN; case '}': // mapping flow end - get_character(); - return lexical_token_t::MAPPING_FLOW_END; + m_input_handler.get_next(); + return m_last_token_type = lexical_token_t::MAPPING_FLOW_END; case '@': throw fkyaml::exception("Any token cannot start with at(@). It is a reserved indicator for YAML."); case '`': @@ -236,106 +212,104 @@ class lexical_analyzer "Any token cannot start with grave accent(`). It is a reserved indicator for YAML."); case '\"': case '\'': - return scan_string(); + return m_last_token_type = scan_string(); case '~': m_value_buffer = current; - return lexical_token_t::NULL_VALUE; + return m_last_token_type = lexical_token_t::NULL_VALUE; case '+': - return scan_number(); + return m_last_token_type = scan_number(); case '.': { - if (!get_string_from_input(4, m_value_buffer)) - { - return scan_string(); - } - - if (m_value_buffer == ".inf" || m_value_buffer == ".Inf" || m_value_buffer == ".INF") + if (m_input_handler.get_range(4, m_value_buffer) == end_of_input) { - get_character(); - return lexical_token_t::FLOAT_NUMBER_VALUE; + return m_last_token_type = scan_string(); } - if (m_value_buffer == ".nan" || m_value_buffer == ".NaN" || m_value_buffer == ".NAN") + try { - get_character(); - return lexical_token_t::FLOAT_NUMBER_VALUE; + // try convert to an infinite/nan value. + m_float_val = from_string(m_value_buffer, type_tag {}); + m_input_handler.get_next(); + return m_last_token_type = lexical_token_t::FLOAT_NUMBER_VALUE; } - - for (int i = 0; i < 3; i++) + catch (const fkyaml::exception& /*unused*/) { - unget_character(); + // revert change in the position to the one before comparison above. + m_input_handler.unget_range(3); + return m_last_token_type = scan_string(); } - m_last_char = current; - return scan_string(); } case 'F': case 'f': { // YAML specifies that only these words represent the boolean value `false`. // See "10.3.2. Tag Resolution" section in https://yaml.org/spec/1.2.2/ - if (!get_string_from_input(5, m_value_buffer)) + if (m_input_handler.get_range(5, m_value_buffer) == end_of_input) { - return scan_string(); + return m_last_token_type = scan_string(); } - if (m_value_buffer == "false" || m_value_buffer == "False" || m_value_buffer == "FALSE") + try { - get_character(); - return lexical_token_t::BOOLEAN_VALUE; + // try convert to a boolean false value. + m_boolean_val = from_string(m_value_buffer, type_tag {}); + m_input_handler.get_next(); + return m_last_token_type = lexical_token_t::BOOLEAN_VALUE; } - - for (int i = 0; i < 4; i++) + catch (const fkyaml::exception& /*unused*/) { - unget_character(); + // revert change in the position to the one before comparison above. + m_input_handler.unget_range(4); + return m_last_token_type = scan_string(); } - m_last_char = current; - return scan_string(); } case 'N': case 'n': { // YAML specifies that these words and a tilde represent a null value. // Tildes are already checked above, so no check is needed here. // See "10.3.2. Tag Resolution" section in https://yaml.org/spec/1.2.2/ - if (!get_string_from_input(4, m_value_buffer)) + if (m_input_handler.get_range(4, m_value_buffer) == end_of_input) { - return scan_string(); + return m_last_token_type = scan_string(); } - if (m_value_buffer == "null" || m_value_buffer == "Null" || m_value_buffer == "NULL") + try { - get_character(); - return lexical_token_t::NULL_VALUE; + // try convert to a null value. + from_string(m_value_buffer, type_tag {}); + m_input_handler.get_next(); + return m_last_token_type = lexical_token_t::NULL_VALUE; } - - for (int i = 0; i < 3; i++) + catch (const fkyaml::exception& /*unused*/) { - unget_character(); + // revert change in the position to the one before comparison above. + m_input_handler.unget_range(3); + return m_last_token_type = scan_string(); } - m_last_char = current; - return scan_string(); } case 'T': case 't': { // YAML specifies that only these words represent the boolean value `true`. // See "10.3.2. Tag Resolution" section in https://yaml.org/spec/1.2.2/ - if (!get_string_from_input(4, m_value_buffer)) + if (m_input_handler.get_range(4, m_value_buffer) == end_of_input) { - return scan_string(); + return m_last_token_type = scan_string(); } - if (m_value_buffer == "true" || m_value_buffer == "True" || m_value_buffer == "TRUE") + try { - get_character(); - return lexical_token_t::BOOLEAN_VALUE; + // try convert to a boolean true value. + m_boolean_val = from_string(m_value_buffer, type_tag {}); + m_input_handler.get_next(); + return m_last_token_type = lexical_token_t::BOOLEAN_VALUE; } - - for (int i = 0; i < 3; i++) + catch (const fkyaml::exception& /*unused*/) { - unget_character(); + // revert change in the position to the one before comparison above. + m_input_handler.unget_range(3); + return m_last_token_type = scan_string(); } - m_last_char = current; - return scan_string(); } default: - return scan_string(); + return m_last_token_type = scan_string(); } } @@ -346,13 +320,10 @@ class lexical_analyzer */ std::nullptr_t get_null() const { - FK_YAML_ASSERT(!m_value_buffer.empty()); - - if (m_value_buffer == "null" || m_value_buffer == "Null" || m_value_buffer == "NULL" || m_value_buffer == "~") + if (m_last_token_type == lexical_token_t::NULL_VALUE) { return nullptr; } - throw fkyaml::exception("Invalid request for a null value."); } @@ -364,18 +335,10 @@ class lexical_analyzer */ boolean_type get_boolean() const { - FK_YAML_ASSERT(!m_value_buffer.empty()); - - if (m_value_buffer == "true" || m_value_buffer == "True" || m_value_buffer == "TRUE") - { - return static_cast(true); - } - - if (m_value_buffer == "false" || m_value_buffer == "False" || m_value_buffer == "FALSE") + if (m_last_token_type == lexical_token_t::BOOLEAN_VALUE) { - return static_cast(false); + return m_boolean_val; } - throw fkyaml::exception("Invalid request for a boolean value."); } @@ -386,14 +349,11 @@ class lexical_analyzer */ integer_type get_integer() const { - FK_YAML_ASSERT(!m_value_buffer.empty()); - - char* endptr = nullptr; - const auto tmp_val = std::strtoll(m_value_buffer.data(), &endptr, 0); - - FK_YAML_ASSERT(endptr == m_value_buffer.data() + m_value_buffer.size()); - - return static_cast(tmp_val); + if (m_last_token_type == lexical_token_t::INTEGER_VALUE) + { + return m_integer_val; + } + throw fkyaml::exception("Invalid request for an integer value."); } /** @@ -403,30 +363,11 @@ class lexical_analyzer */ float_number_type get_float_number() const { - FK_YAML_ASSERT(!m_value_buffer.empty()); - - if (m_value_buffer == ".inf" || m_value_buffer == ".Inf" || m_value_buffer == ".INF") - { - return std::numeric_limits::infinity(); - } - - if (m_value_buffer == "-.inf" || m_value_buffer == "-.Inf" || m_value_buffer == "-.INF") + if (m_last_token_type == lexical_token_t::FLOAT_NUMBER_VALUE) { - static_assert(std::numeric_limits::is_iec559, "IEEE 754 required."); - return -1 * std::numeric_limits::infinity(); + return m_float_val; } - - if (m_value_buffer == ".nan" || m_value_buffer == ".NaN" || m_value_buffer == ".NAN") - { - return std::nan(""); - } - - char* endptr = nullptr; - const double value = std::strtod(m_value_buffer.data(), &endptr); - - FK_YAML_ASSERT(endptr == m_value_buffer.data() + m_value_buffer.size()); - - return static_cast(value); + throw fkyaml::exception("Invalid request for a float number value."); } /** @@ -436,6 +377,8 @@ class lexical_analyzer */ const string_type& get_string() const noexcept { + // TODO: Provide support for different string types between nodes & inputs. + static_assert(std::is_same::value, "Unsupported, different string types."); return m_value_buffer; } @@ -499,7 +442,7 @@ class lexical_analyzer */ lexical_token_t scan_comment() { - FK_YAML_ASSERT(get_current_character() == '#'); + FK_YAML_ASSERT(m_input_handler.get_current() == '#'); skip_until_line_end(); return lexical_token_t::COMMENT_PREFIX; @@ -513,19 +456,19 @@ class lexical_analyzer */ lexical_token_t scan_directive() { - FK_YAML_ASSERT(get_current_character() == '%'); + FK_YAML_ASSERT(m_input_handler.get_current() == '%'); - switch (get_character()) + switch (m_input_handler.get_next()) { - case char_traits_type::eof(): + case end_of_input: throw fkyaml::exception("invalid eof in a directive."); case 'T': { - if (get_character() != 'A' || get_character() != 'G') + if (m_input_handler.get_next() != 'A' || m_input_handler.get_next() != 'G') { skip_until_line_end(); return lexical_token_t::INVALID_DIRECTIVE; } - if (get_character() != ' ') + if (m_input_handler.get_next() != ' ') { throw fkyaml::exception("There must be a half-width space between \"%TAG\" and tag info."); } @@ -533,12 +476,13 @@ class lexical_analyzer return lexical_token_t::TAG_DIRECTIVE; } case 'Y': - if (get_character() != 'A' || get_character() != 'M' || get_character() != 'L') + if (m_input_handler.get_next() != 'A' || m_input_handler.get_next() != 'M' || + m_input_handler.get_next() != 'L') { skip_until_line_end(); return lexical_token_t::INVALID_DIRECTIVE; } - if (get_character() != ' ') + if (m_input_handler.get_next() != ' ') { throw fkyaml::exception("There must be a half-width space between \"%YAML\" and a version number."); } @@ -559,23 +503,23 @@ class lexical_analyzer { m_value_buffer.clear(); - if (get_character() != '1') + if (m_input_handler.get_next() != '1') { throw fkyaml::exception("Invalid YAML major version found."); } - m_value_buffer.push_back(get_current_character()); + m_value_buffer.push_back(m_input_handler.get_current()); - if (get_character() != '.') + if (m_input_handler.get_next() != '.') { throw fkyaml::exception("A period must be followed after the YAML major version."); } - m_value_buffer.push_back(get_current_character()); + m_value_buffer.push_back(m_input_handler.get_current()); - switch (get_character()) + switch (m_input_handler.get_next()) { case '1': case '2': - m_value_buffer.push_back(get_current_character()); + m_value_buffer.push_back(m_input_handler.get_current()); break; case '0': case '3': @@ -590,7 +534,8 @@ class lexical_analyzer throw fkyaml::exception("YAML version must be specified with digits and periods."); } - if (get_character() != ' ' && get_current_character() != '\r' && get_current_character() != '\n') + if (m_input_handler.get_next() != ' ' && m_input_handler.get_current() != '\r' && + m_input_handler.get_current() != '\n') { throw fkyaml::exception("Only YAML version 1.1/1.2 are supported."); } @@ -608,19 +553,23 @@ class lexical_analyzer { m_value_buffer.clear(); - char_int_type current = get_current_character(); + char_int_type current = m_input_handler.get_current(); FK_YAML_ASSERT(std::isdigit(current) || current == '-' || current == '+'); + lexical_token_t ret = lexical_token_t::END_OF_BUFFER; switch (current) { case '-': m_value_buffer.push_back(current); - return scan_negative_number(); + ret = scan_negative_number(); + break; case '+': - return scan_decimal_number(); + ret = scan_decimal_number(); + break; case '0': m_value_buffer.push_back(current); - return scan_number_after_zero_at_first(); + ret = scan_number_after_zero_at_first(); + break; case '1': case '2': case '3': @@ -631,10 +580,25 @@ class lexical_analyzer case '8': case '9': m_value_buffer.push_back(current); - return scan_decimal_number(); + ret = scan_decimal_number(); + break; default: // LCOV_EXCL_LINE throw fkyaml::exception("Invalid character found in a number token."); // LCOV_EXCL_LINE } + + switch (ret) + { + case lexical_token_t::INTEGER_VALUE: + m_integer_val = from_string(m_value_buffer, type_tag {}); + break; + case lexical_token_t::FLOAT_NUMBER_VALUE: + m_float_val = from_string(m_value_buffer, type_tag {}); + break; + default: // LCOV_EXCL_LINE + break; // LCOV_EXCL_LINE + } + + return ret; } /** @@ -644,24 +608,30 @@ class lexical_analyzer */ lexical_token_t scan_negative_number() { - char_int_type next = get_character(); + char_int_type next = m_input_handler.get_next(); FK_YAML_ASSERT(std::isdigit(next) || next == '.'); if (std::isdigit(next)) { m_value_buffer.push_back(next); - const lexical_token_t ret = scan_decimal_number(); - return (ret == lexical_token_t::FLOAT_NUMBER_VALUE) ? ret : lexical_token_t::INTEGER_VALUE; + return scan_decimal_number(); } - if (get_string_from_input(4, m_value_buffer)) + if (m_input_handler.get_range(4, m_value_buffer) != end_of_input) { - if (m_value_buffer == ".inf" || m_value_buffer == ".Inf" || m_value_buffer == ".INF") + try { - get_character(); + // check if convertible to an infinite value. + from_string(m_value_buffer, type_tag {}); + m_input_handler.get_next(); return lexical_token_t::FLOAT_NUMBER_VALUE; } + catch (const fkyaml::exception& /*unused*/) + { + // handle this error below. + } } + throw fkyaml::exception("Invalid character found in a negative number token."); // LCOV_EXCL_LINE } @@ -672,7 +642,7 @@ class lexical_analyzer */ lexical_token_t scan_number_after_zero_at_first() { - char_int_type next = get_character(); + char_int_type next = m_input_handler.get_next(); switch (next) { case '.': @@ -698,8 +668,7 @@ class lexical_analyzer */ lexical_token_t scan_decimal_number_after_decimal_point() { - char_int_type next = get_character(); - FK_YAML_ASSERT(std::isdigit(next)); + char_int_type next = m_input_handler.get_next(); if (std::isdigit(next)) { @@ -718,7 +687,7 @@ class lexical_analyzer */ lexical_token_t scan_decimal_number_after_exponent() { - char_int_type next = get_character(); + char_int_type next = m_input_handler.get_next(); if (next == '+' || next == '-') { m_value_buffer.push_back(next); @@ -739,8 +708,7 @@ class lexical_analyzer */ lexical_token_t scan_decimal_number_after_sign() { - char_int_type next = get_character(); - FK_YAML_ASSERT(std::isdigit(next)); + char_int_type next = m_input_handler.get_next(); if (std::isdigit(next)) { @@ -758,7 +726,7 @@ class lexical_analyzer */ lexical_token_t scan_decimal_number() { - char_int_type next = get_character(); + char_int_type next = m_input_handler.get_next(); if (std::isdigit(next)) { @@ -768,7 +736,7 @@ class lexical_analyzer if (next == '.') { - if (m_value_buffer.find(next) != std::string::npos) // NOLINT(abseil-string-find-str-contains) + if (m_value_buffer.find(next) != string_type::npos) // NOLINT(abseil-string-find-str-contains) { // TODO: support this use case (e.g. version info like 1.0.0) throw fkyaml::exception("Multiple decimal points found in a token."); @@ -793,7 +761,7 @@ class lexical_analyzer */ lexical_token_t scan_octal_number() { - char_int_type next = get_character(); + char_int_type next = m_input_handler.get_next(); if ('0' <= next && next <= '7') { m_value_buffer.push_back(next); @@ -809,7 +777,7 @@ class lexical_analyzer */ lexical_token_t scan_hexadecimal_number() { - char_int_type next = get_character(); + char_int_type next = m_input_handler.get_next(); if (std::isxdigit(next)) { m_value_buffer.push_back(next); @@ -828,16 +796,16 @@ class lexical_analyzer { m_value_buffer.clear(); - const bool needs_last_double_quote = (get_current_character() == '\"'); - const bool needs_last_single_quote = (get_current_character() == '\''); + const bool needs_last_double_quote = (m_input_handler.get_current() == '\"'); + const bool needs_last_single_quote = (m_input_handler.get_current() == '\''); size_t start_pos_backup = m_position_info.total_read_char_counts; - char_int_type current = - (needs_last_double_quote || needs_last_single_quote) ? get_character() : get_current_character(); + char_int_type current = (needs_last_double_quote || needs_last_single_quote) ? m_input_handler.get_next() + : m_input_handler.get_current(); - for (;; current = get_character()) + for (;; current = m_input_handler.get_next()) { // Handle the end of input buffer. - if (current == char_traits_type::eof()) + if (current == end_of_input) { if (needs_last_double_quote) { @@ -871,7 +839,7 @@ class lexical_analyzer { if (needs_last_double_quote) { - get_character(); + m_input_handler.get_next(); return lexical_token_t::STRING_VALUE; } @@ -895,9 +863,9 @@ class lexical_analyzer // If single quotation marks are repeated twice in a single-quoted string token. they are considered as // an escaped single quotation mark. - if (test_next_char('\'')) + if (m_input_handler.test_next_char('\'')) { - m_value_buffer.push_back(get_character()); + m_value_buffer.push_back(m_input_handler.get_next()); continue; } @@ -914,9 +882,8 @@ class lexical_analyzer continue; } - char_int_type next = get_character(); - unget_character(); - m_last_char = current; + char_int_type next = m_input_handler.get_next(); + m_input_handler.unget(); // A colon as a key separator must be followed by a space or a newline code. if (next != ' ' && next != '\r' && next != '\n') @@ -1002,7 +969,7 @@ class lexical_analyzer throw fkyaml::exception("Escaped characters are only available in a double-quoted string token."); } - current = get_character(); + current = m_input_handler.get_next(); switch (current) { case 'a': @@ -1045,14 +1012,15 @@ class lexical_analyzer char byte = 0; for (int i = 1; i >= 0; --i) { - char four_bits = convert_hex_char_to_byte(char_traits_type::to_char_type(get_character())); + char four_bits = + convert_hex_char_to_byte(char_traits_type::to_char_type(m_input_handler.get_next())); // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) byte |= static_cast(four_bits << (4 * i)); } m_value_buffer.push_back(byte); break; } - // Multibyte characters are currently unsupported. + // TODO: Multibyte characters are not yet supported. // Thus \N, \_, \L, \P \uXX, \UXXXX are currently unavailable. default: throw fkyaml::exception("Unsupported escape sequence found in a string token."); @@ -1060,164 +1028,99 @@ class lexical_analyzer continue; } - // Handle ASCII characters except control characters. - if (0x20 <= current && current <= 0x7E) + // Handle unescaped control characters. + if (0x00 <= current && current <= 0x1F) { - m_value_buffer.push_back(current); + handle_unescaped_control_char(current); continue; } - // Handle unescaped control characters. - switch (current) + // Handle ASCII characters except control characters. + if (0x20 <= current && current <= 0x7E) { - // 0x00(NULL) has already been handled above. - case 0x01: - throw fkyaml::exception("Control character U+0001 (SOH) must be escaped to \\u0001."); - case 0x02: - throw fkyaml::exception("Control character U+0002 (STX) must be escaped to \\u0002."); - case 0x03: - throw fkyaml::exception("Control character U+0003 (ETX) must be escaped to \\u0003."); - case 0x04: - throw fkyaml::exception("Control character U+0004 (EOT) must be escaped to \\u0004."); - case 0x05: - throw fkyaml::exception("Control character U+0005 (ENQ) must be escaped to \\u0005."); - case 0x06: - throw fkyaml::exception("Control character U+0006 (ACK) must be escaped to \\u0006."); - case 0x07: - throw fkyaml::exception("Control character U+0007 (BEL) must be escaped to \\a or \\u0007."); - case 0x08: - throw fkyaml::exception("Control character U+0008 (BS) must be escaped to \\b or \\u0008."); - case 0x09: // HT m_value_buffer.push_back(current); - break; - // 0x0A(LF) has already been handled above. - case 0x0B: - throw fkyaml::exception("Control character U+000B (VT) must be escaped to \\v or \\u000B."); - case 0x0C: - throw fkyaml::exception("Control character U+000C (FF) must be escaped to \\f or \\u000C."); - // 0x0D(CR) has already been handled above. - case 0x0E: - throw fkyaml::exception("Control character U+000E (SO) must be escaped to \\u000E."); - case 0x0F: - throw fkyaml::exception("Control character U+000F (SI) must be escaped to \\u000F."); - case 0x10: - throw fkyaml::exception("Control character U+0010 (DLE) must be escaped to \\u0010."); - case 0x11: - throw fkyaml::exception("Control character U+0011 (DC1) must be escaped to \\u0011."); - case 0x12: - throw fkyaml::exception("Control character U+0012 (DC2) must be escaped to \\u0012."); - case 0x13: - throw fkyaml::exception("Control character U+0013 (DC3) must be escaped to \\u0013."); - case 0x14: - throw fkyaml::exception("Control character U+0014 (DC4) must be escaped to \\u0014."); - case 0x15: - throw fkyaml::exception("Control character U+0015 (NAK) must be escaped to \\u0015."); - case 0x16: - throw fkyaml::exception("Control character U+0016 (SYN) must be escaped to \\u0016."); - case 0x17: - throw fkyaml::exception("Control character U+0017 (ETB) must be escaped to \\u0017."); - case 0x18: - throw fkyaml::exception("Control character U+0018 (CAN) must be escaped to \\u0018."); - case 0x19: - throw fkyaml::exception("Control character U+0019 (EM) must be escaped to \\u0019."); - case 0x1A: - throw fkyaml::exception("Control character U+001A (SUB) must be escaped to \\u001A."); - case 0x1B: - throw fkyaml::exception("Control character U+001B (ESC) must be escaped to \\e or \\u001B."); - case 0x1C: - throw fkyaml::exception("Control character U+001C (FS) must be escaped to \\u001C."); - case 0x1D: - throw fkyaml::exception("Control character U+001D (GS) must be escaped to \\u001D."); - case 0x1E: - throw fkyaml::exception("Control character U+001E (RS) must be escaped to \\u001E."); - case 0x1F: - throw fkyaml::exception("Control character U+001F (US) must be escaped to \\u001F."); - // 0x20('0')~0x7E('~') have already been handled above. - // 16bit, 32bit characters are currently not supported. - default: - throw fkyaml::exception("Unsupported multibytes character found."); + continue; } - } - } - /** - * @brief Update position by moving backward. - */ - void unget_character() noexcept - { - m_input_adapter.unget_character(); - } - - /** - * @brief Get current character from the input buffer without position updates. - * - * @return char_int_type The next character. - */ - char_int_type get_current_character() noexcept - { - if (m_is_first_input_char) - { - m_last_char = get_character(); + // TODO: multibyte characters are not yet supported. + throw fkyaml::exception("Unsupported multibytes character found."); } - return m_last_char; - } - - /** - * @brief Get the next character from the input buffer with position updates. - * - * @return char_int_type Constant reference to the next character. - */ - char_int_type get_character() noexcept - { - m_is_first_input_char = false; - m_last_char = m_input_adapter.get_character(); - return m_last_char; } /** - * @brief Get the string from input object. + * @brief Handle unescaped control characters. * - * @param count The number of characters. - * @param str A container of a resulting string. - * @return true Succeeded in getting strings. - * @return false Failed to get strings. + * @param c A target character. */ - bool get_string_from_input(const int count, std::string& str) noexcept + void handle_unescaped_control_char(char_int_type c) { - str.clear(); - char_int_type backup = m_last_char; - str += m_last_char; + FK_YAML_ASSERT(0x00 <= c && c <= 0x1F); - for (int i = 1; i < count; i++) + switch (c) { - if (get_character() == char_traits_type::eof()) - { - for (int j = i; j > 1; j--) - { - unget_character(); - } - m_last_char = backup; - str.clear(); - return false; - } - str += m_last_char; + // 0x00(NULL) has already been handled above. + case 0x01: + throw fkyaml::exception("Control character U+0001 (SOH) must be escaped to \\u0001."); + case 0x02: + throw fkyaml::exception("Control character U+0002 (STX) must be escaped to \\u0002."); + case 0x03: + throw fkyaml::exception("Control character U+0003 (ETX) must be escaped to \\u0003."); + case 0x04: + throw fkyaml::exception("Control character U+0004 (EOT) must be escaped to \\u0004."); + case 0x05: + throw fkyaml::exception("Control character U+0005 (ENQ) must be escaped to \\u0005."); + case 0x06: + throw fkyaml::exception("Control character U+0006 (ACK) must be escaped to \\u0006."); + case 0x07: + throw fkyaml::exception("Control character U+0007 (BEL) must be escaped to \\a or \\u0007."); + case 0x08: + throw fkyaml::exception("Control character U+0008 (BS) must be escaped to \\b or \\u0008."); + case 0x09: // HT + m_value_buffer.push_back(c); + break; + // 0x0A(LF) has already been handled above. + case 0x0B: + throw fkyaml::exception("Control character U+000B (VT) must be escaped to \\v or \\u000B."); + case 0x0C: + throw fkyaml::exception("Control character U+000C (FF) must be escaped to \\f or \\u000C."); + // 0x0D(CR) has already been handled above. + case 0x0E: + throw fkyaml::exception("Control character U+000E (SO) must be escaped to \\u000E."); + case 0x0F: + throw fkyaml::exception("Control character U+000F (SI) must be escaped to \\u000F."); + case 0x10: + throw fkyaml::exception("Control character U+0010 (DLE) must be escaped to \\u0010."); + case 0x11: + throw fkyaml::exception("Control character U+0011 (DC1) must be escaped to \\u0011."); + case 0x12: + throw fkyaml::exception("Control character U+0012 (DC2) must be escaped to \\u0012."); + case 0x13: + throw fkyaml::exception("Control character U+0013 (DC3) must be escaped to \\u0013."); + case 0x14: + throw fkyaml::exception("Control character U+0014 (DC4) must be escaped to \\u0014."); + case 0x15: + throw fkyaml::exception("Control character U+0015 (NAK) must be escaped to \\u0015."); + case 0x16: + throw fkyaml::exception("Control character U+0016 (SYN) must be escaped to \\u0016."); + case 0x17: + throw fkyaml::exception("Control character U+0017 (ETB) must be escaped to \\u0017."); + case 0x18: + throw fkyaml::exception("Control character U+0018 (CAN) must be escaped to \\u0018."); + case 0x19: + throw fkyaml::exception("Control character U+0019 (EM) must be escaped to \\u0019."); + case 0x1A: + throw fkyaml::exception("Control character U+001A (SUB) must be escaped to \\u001A."); + case 0x1B: + throw fkyaml::exception("Control character U+001B (ESC) must be escaped to \\e or \\u001B."); + case 0x1C: + throw fkyaml::exception("Control character U+001C (FS) must be escaped to \\u001C."); + case 0x1D: + throw fkyaml::exception("Control character U+001D (GS) must be escaped to \\u001D."); + case 0x1E: + throw fkyaml::exception("Control character U+001E (RS) must be escaped to \\u001E."); + case 0x1F: + throw fkyaml::exception("Control character U+001F (US) must be escaped to \\u001F."); } - - return true; - } - - /** - * @brief Check if the next character is the expected one. - * - * @param expected_char An expected next character. - * @return true The next character is the expected one. - * @return false The next character is not the expected one. - */ - bool test_next_char(char_int_type expected_char) noexcept - { - char_int_type next = m_input_adapter.get_character(); - unget_character(); - return next == expected_char; } /** @@ -1227,7 +1130,7 @@ class lexical_analyzer { while (true) { - switch (get_current_character()) + switch (m_input_handler.get_current()) { case ' ': case '\t': @@ -1237,7 +1140,7 @@ class lexical_analyzer default: return; } - get_character(); + m_input_handler.get_next(); } } @@ -1248,23 +1151,23 @@ class lexical_analyzer { while (true) { - switch (get_current_character()) + switch (m_input_handler.get_current()) { - case char_traits_type::eof(): + case end_of_input: return; case '\r': - if (get_character() == '\n') + if (m_input_handler.get_next() == '\n') { - get_character(); + m_input_handler.get_next(); } return; case '\n': - get_character(); + m_input_handler.get_next(); return; default: break; } - get_character(); + m_input_handler.get_next(); } } @@ -1327,18 +1230,25 @@ class lexical_analyzer } private: + //!< The value of EOF for the target characters. + static constexpr char_int_type end_of_input = char_traits_type::eof(); + //!< An input buffer adapter to be analyzed. - InputAdapterType m_input_adapter; - //!< The most recent value read from the input buffer. - char_int_type m_last_char; - //!< A flag to determine whether the first input char has been retrieved. - bool m_is_first_input_char {true}; + input_handler_type m_input_handler; //!< A temporal buffer to store a string to be parsed to an actual datum. - string_type m_value_buffer; + input_string_type m_value_buffer; //!< The information set for input buffer. position m_position_info; //!< A stack to store indent width history. std::vector m_indent_width_stack; + //!< The last found token type. + lexical_token_t m_last_token_type; + //!< A temporal bool holder. + boolean_type m_boolean_val; + //!< A temporal integer holder. + integer_type m_integer_val; + //!< A temporal floating point number holder. + float_number_type m_float_val; }; } // namespace detail diff --git a/include/fkYAML/detail/meta/type_traits.hpp b/include/fkYAML/detail/meta/type_traits.hpp index e79d1387..4974e7f2 100644 --- a/include/fkYAML/detail/meta/type_traits.hpp +++ b/include/fkYAML/detail/meta/type_traits.hpp @@ -199,6 +199,17 @@ template constexpr T static_const::value; #endif +/** + * @brief A helper structure for tag dispatch. + * + * @tparam T A tag type. + */ +template +struct type_tag +{ + using type = T; +}; + } // namespace detail FK_YAML_NAMESPACE_END diff --git a/include/fkYAML/detail/types/lexical_token_t.hpp b/include/fkYAML/detail/types/lexical_token_t.hpp new file mode 100644 index 00000000..a22653cc --- /dev/null +++ b/include/fkYAML/detail/types/lexical_token_t.hpp @@ -0,0 +1,63 @@ +/** + * _______ __ __ __ _____ __ __ __ + * | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library + * | __| _ < \_ _/| ___ | _ | |___ version 0.1.1 + * |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML + * + * SPDX-FileCopyrightText: 2023 Kensuke Fukutani + * SPDX-License-Identifier: MIT + * + * @file + */ + +#ifndef FK_YAML_DETAIL_TYPES_LEXICAL_TOKEN_T_HPP_ +#define FK_YAML_DETAIL_TYPES_LEXICAL_TOKEN_T_HPP_ + +#include + +/** + * @namespace fkyaml + * @brief namespace for fkYAML library. + */ +FK_YAML_NAMESPACE_BEGIN + +/** + * @namespace detail + * @brief namespace for internal implementations of fkYAML library. + */ +namespace detail +{ + +/** + * @enum lexical_token_t + * @brief Definition of lexical token types. + */ +enum class lexical_token_t +{ + END_OF_BUFFER, //!< the end of input buffer. + KEY_SEPARATOR, //!< the key separater `:` + VALUE_SEPARATOR, //!< the value separater `,` + ANCHOR_PREFIX, //!< the character for anchor prefix `&` + ALIAS_PREFIX, //!< the character for alias prefix `*` + COMMENT_PREFIX, //!< the character for comment prefix `#` + YAML_VER_DIRECTIVE, //!< a YAML version directive found. use get_yaml_version() to get a value. + TAG_DIRECTIVE, //!< a TAG directive found. use GetTagInfo() to get the tag information. + INVALID_DIRECTIVE, //!< an invalid directive found. do not try to get the value. + SEQUENCE_BLOCK_PREFIX, //!< the character for sequence block prefix `- ` + SEQUENCE_FLOW_BEGIN, //!< the character for sequence flow begin `[` + SEQUENCE_FLOW_END, //!< the character for sequence flow end `]` + MAPPING_BLOCK_PREFIX, //!< the character for mapping block prefix `:` + MAPPING_FLOW_BEGIN, //!< the character for mapping begin `{` + MAPPING_FLOW_END, //!< the character for mapping end `}` + NULL_VALUE, //!< a null value found. use get_null() to get a value. + BOOLEAN_VALUE, //!< a boolean value found. use get_boolean() to get a value. + INTEGER_VALUE, //!< an integer value found. use get_integer() to get a value. + FLOAT_NUMBER_VALUE, //!< a float number value found. use get_float_number() to get a value. + STRING_VALUE, //!< the character for string begin `"` or any character except the above ones +}; + +} // namespace detail + +FK_YAML_NAMESPACE_END + +#endif /* FK_YAML_DETAIL_TYPES_LEXICAL_TOKEN_T_HPP_ */ \ No newline at end of file diff --git a/test/unit_test/CMakeLists.txt b/test/unit_test/CMakeLists.txt index d0e3ce34..6ed45eb2 100644 --- a/test/unit_test/CMakeLists.txt +++ b/test/unit_test/CMakeLists.txt @@ -58,6 +58,7 @@ add_executable( ${TEST_TARGET} test_deserializer_class.cpp test_exception_class.cpp + test_from_string.cpp test_iterator_class.cpp test_lexical_analyzer_class.cpp test_node_class.cpp diff --git a/test/unit_test/test_from_string.cpp b/test/unit_test/test_from_string.cpp new file mode 100644 index 00000000..ba1c6905 --- /dev/null +++ b/test/unit_test/test_from_string.cpp @@ -0,0 +1,223 @@ +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) +// | __| _ < \_ _/| ___ | _ | |___ version 0.1.1 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023 Kensuke Fukutani +// SPDX-License-Identifier: MIT + +#include +#include +#include + +#include + +#include + +TEST_CASE("FromStringTest_NullptrTest", "[FromStringTest]") +{ + SECTION("nothrow expected tests") + { + auto input = GENERATE(std::string("null"), std::string("Null"), std::string("NULL"), std::string("~")); + REQUIRE(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == nullptr); + } + + SECTION("nothrow unexpected test") + { + std::string input("test"); + REQUIRE_THROWS_AS( + fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}), fkyaml::exception); + } +} + +TEST_CASE("FromStringTest_BoolTest", "[FromStringTest]") +{ + SECTION("true value expected tests") + { + auto input = GENERATE(std::string("true"), std::string("True"), std::string("TRUE")); + REQUIRE(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == true); + } + + SECTION("false value expected tests") + { + auto input = GENERATE(std::string("false"), std::string("False"), std::string("FALSE")); + REQUIRE(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == false); + } + + SECTION("nothrow unexpected test") + { + std::string input("test"); + REQUIRE_THROWS_AS(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}), fkyaml::exception); + } +} + +TEST_CASE("FromStringTest_IntegerTest", "[FromStringTest]") +{ + SECTION("char type tests") + { + std::string input("-64"); + REQUIRE(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == -64); + + input = "256"; + REQUIRE_THROWS_AS(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}), fkyaml::exception); + } + + SECTION("unsigned char type tests") + { + std::string input("64"); + REQUIRE(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == 64); + + input = "512"; + REQUIRE_THROWS_AS( + fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}), fkyaml::exception); + } + + SECTION("short type tests") + { + std::string input("-15464"); + REQUIRE(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == -15464); + + input = "45464"; + REQUIRE_THROWS_AS(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}), fkyaml::exception); + } + + SECTION("unsigned short type tests") + { + std::string input("15464"); + REQUIRE(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == 15464); + + input = "-1"; + REQUIRE_THROWS_AS( + fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}), fkyaml::exception); + } + + SECTION("int type tests") + { + std::string input("-1154357464"); + REQUIRE(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == -1154357464); + + input = "3154357464"; + REQUIRE_THROWS_AS(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}), fkyaml::exception); + } + + SECTION("unsigned int type tests") + { + std::string input("3154357464"); + REQUIRE(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == 3154357464u); + + input = "999999999999999999999999"; + REQUIRE_THROWS_AS( + fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}), fkyaml::exception); + } + + SECTION("long type tests") + { + std::string input("-1154357464"); + REQUIRE(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == -1154357464l); + + input = "9413456789012123456"; + REQUIRE_THROWS_AS(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}), fkyaml::exception); + } + + SECTION("unsigned long type tests") + { + std::string input("317464"); + REQUIRE(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == 317464ul); + + input = "999999999999999999999999"; + REQUIRE_THROWS_AS( + fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}), fkyaml::exception); + } + + SECTION("long long type tests") + { + std::string input("-1154357464"); + REQUIRE(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == -1154357464ll); + + input = "18413456789012123456"; + REQUIRE_THROWS_AS( + fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}), fkyaml::exception); + } + + SECTION("unsigned long long type tests") + { + std::string input("3154357464"); + REQUIRE(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == 3154357464ull); + + input = "999999999999999999999999"; + REQUIRE_THROWS_AS( + fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}), fkyaml::exception); + } +} + +TEST_CASE("FromStringTest_FloatTest", "[FromStringTest]") +{ + SECTION("positive infinity test") + { + auto input = GENERATE(std::string(".inf"), std::string(".Inf"), std::string(".INF")); + REQUIRE( + fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == + std::numeric_limits::infinity()); + } + + SECTION("negative infinity test") + { + auto input = GENERATE(std::string("-.inf"), std::string("-.Inf"), std::string("-.INF")); + REQUIRE( + fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == + -1 * std::numeric_limits::infinity()); + } + + SECTION("NaN test") + { + auto input = GENERATE(std::string(".nan"), std::string(".NaN"), std::string(".NAN")); + float ret = 0.0f; + REQUIRE_NOTHROW(ret = fkyaml::detail::from_string(input, fkyaml::detail::type_tag {})); + REQUIRE(std::isnan(ret)); + } + + SECTION("value conversion tests") + { + std::string input("3.14"); + REQUIRE(std::abs(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) - 3.14f) < FLT_EPSILON); + + input = "3.40282347e+39"; + REQUIRE_THROWS_AS(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}), fkyaml::exception); + } +} + +TEST_CASE("FromStringTest_DoubleTest", "[FromStringTest]") +{ + SECTION("positive infinity test") + { + auto input = GENERATE(std::string(".inf"), std::string(".Inf"), std::string(".INF")); + REQUIRE( + fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == + std::numeric_limits::infinity()); + } + + SECTION("negative infinity test") + { + auto input = GENERATE(std::string("-.inf"), std::string("-.Inf"), std::string("-.INF")); + REQUIRE( + fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) == + -1 * std::numeric_limits::infinity()); + } + + SECTION("NaN test") + { + auto input = GENERATE(std::string(".nan"), std::string(".NaN"), std::string(".NAN")); + double ret = 0.0; + REQUIRE_NOTHROW(ret = fkyaml::detail::from_string(input, fkyaml::detail::type_tag {})); + REQUIRE(std::isnan(ret)); + } + + SECTION("value conversion tests") + { + std::string input("3.14"); + REQUIRE(std::abs(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}) - 3.14) < DBL_EPSILON); + + input = "1.7976931348623157E+309"; + REQUIRE_THROWS_AS(fkyaml::detail::from_string(input, fkyaml::detail::type_tag {}), fkyaml::exception); + } +} diff --git a/test/unit_test/test_lexical_analyzer_class.cpp b/test/unit_test/test_lexical_analyzer_class.cpp index 8c9d744c..addebf88 100644 --- a/test/unit_test/test_lexical_analyzer_class.cpp +++ b/test/unit_test/test_lexical_analyzer_class.cpp @@ -517,7 +517,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanReservedIndicatorTokenTest", "[LexicalAn REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::exception); } -TEST_CASE("LexicalAnalyzerClassTest_ScanKeyBooleanvalue_pair_tTokenTest", "[LexicalAnalyzerClassTest]") +TEST_CASE("LexicalAnalyzerClassTest_ScanKeyBooleanValuePairTokenTest", "[LexicalAnalyzerClassTest]") { pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: true")); fkyaml::detail::lexical_token_t token; @@ -539,7 +539,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanKeyBooleanvalue_pair_tTokenTest", "[Lexi REQUIRE(token == fkyaml::detail::lexical_token_t::END_OF_BUFFER); } -TEST_CASE("LexicalAnalyzerClassTest_ScanKeyIntegervalue_pair_tTokenTest", "[LexicalAnalyzerClassTest]") +TEST_CASE("LexicalAnalyzerClassTest_ScanKeyIntegerValuePairTokenTest", "[LexicalAnalyzerClassTest]") { pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: -5784")); fkyaml::detail::lexical_token_t token; @@ -561,7 +561,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanKeyIntegervalue_pair_tTokenTest", "[Lexi REQUIRE(token == fkyaml::detail::lexical_token_t::END_OF_BUFFER); } -TEST_CASE("LexicalAnalyzerClassTest_ScanKeyFloatNumbervalue_pair_tTokenTest", "[LexicalAnalyzerClassTest]") +TEST_CASE("LexicalAnalyzerClassTest_ScanKeyFloatNumberValuePairTokenTest", "[LexicalAnalyzerClassTest]") { pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: -5.58e-3")); fkyaml::detail::lexical_token_t token; @@ -583,7 +583,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanKeyFloatNumbervalue_pair_tTokenTest", "[ REQUIRE(token == fkyaml::detail::lexical_token_t::END_OF_BUFFER); } -TEST_CASE("LexicalAnalyzerClassTest_ScanKeyStringvalue_pair_tTokenTest", "[LexicalAnalyzerClassTest]") +TEST_CASE("LexicalAnalyzerClassTest_ScanKeyStringValuePairTokenTest", "[LexicalAnalyzerClassTest]") { pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: \"some value\"")); fkyaml::detail::lexical_token_t token;