From 0c8154930b6874008e89fe10ec2f6e5db41edcbb Mon Sep 17 00:00:00 2001 From: Zach Laine Date: Fri, 8 Nov 2024 23:15:34 -0600 Subject: [PATCH] Initial, partial sketch of token_parser. See #202. --- include/boost/parser/lexer.hpp | 62 ++++++----- include/boost/parser/parser.hpp | 2 + include/boost/parser/parser_fwd.hpp | 4 + include/boost/parser/token_parser.hpp | 142 ++++++++++++++++++++++++++ test/CMakeLists.txt | 5 +- test/compile_include_lexer_parser.cpp | 10 ++ test/compile_include_parser_lexer.cpp | 9 ++ test/lexer.cpp | 3 - test/lexer_adobe_files.cpp | 4 - test/lexer_and_parser.cpp | 62 +++++++++++ 10 files changed, 269 insertions(+), 34 deletions(-) create mode 100644 include/boost/parser/token_parser.hpp create mode 100644 test/compile_include_lexer_parser.cpp create mode 100644 test/compile_include_parser_lexer.cpp create mode 100644 test/lexer_and_parser.cpp diff --git a/include/boost/parser/lexer.hpp b/include/boost/parser/lexer.hpp index f85ccfc6..8bd8a6b1 100644 --- a/include/boost/parser/lexer.hpp +++ b/include/boost/parser/lexer.hpp @@ -248,6 +248,22 @@ namespace boost { namespace parser { detail::token_kind kind_ : 8; }; +#if BOOST_PARSER_DOXYGEN + + /** TODO */ + template + constexpr bool is_token_v = detail::foo; + +#else + + template + constexpr bool is_token_v = false; + + template + constexpr bool is_token_v> = true; + +#endif + #if defined(BOOST_PARSER_TESTING) template std::ostream & operator<<(std::ostream & os, token const & token) @@ -279,14 +295,6 @@ namespace boost { namespace parser { } #endif - // TODO: Actually, this should go in parse.hpp. - /** TODO */ - template - struct token_parser - { - // TODO - }; - namespace detail { template constexpr parse_spec parse_spec_for() @@ -566,7 +574,7 @@ namespace boost { namespace parser { namespace detail { template - struct wrapper + struct type_wrapper { using type = T; }; @@ -603,7 +611,7 @@ namespace boost { namespace parser { case token_parsed_type::signed_char: { signed char value; report_error( - wrapper{}, + type_wrapper{}, Spec.radix, numeric::parse_int(f, l, value)); return {id, (long long)value}; @@ -611,7 +619,7 @@ namespace boost { namespace parser { case token_parsed_type::unsigned_char: { unsigned char value; report_error( - wrapper{}, + type_wrapper{}, Spec.radix, numeric::parse_int(f, l, value)); return {id, (long long)value}; @@ -619,7 +627,7 @@ namespace boost { namespace parser { case token_parsed_type::short_: { short value; report_error( - wrapper{}, + type_wrapper{}, Spec.radix, numeric::parse_int(f, l, value)); return {id, (long long)value}; @@ -627,7 +635,7 @@ namespace boost { namespace parser { case token_parsed_type::unsigned_short: { unsigned short value; report_error( - wrapper{}, + type_wrapper{}, Spec.radix, numeric::parse_int(f, l, value)); return {id, (long long)value}; @@ -635,7 +643,7 @@ namespace boost { namespace parser { case token_parsed_type::int_: { int value; report_error( - wrapper{}, + type_wrapper{}, Spec.radix, numeric::parse_int(f, l, value)); return {id, (long long)value}; @@ -643,7 +651,7 @@ namespace boost { namespace parser { case token_parsed_type::unsigned_int: { unsigned int value; report_error( - wrapper{}, + type_wrapper{}, Spec.radix, numeric::parse_int(f, l, value)); return {id, (long long)value}; @@ -651,7 +659,7 @@ namespace boost { namespace parser { case token_parsed_type::long_: { long value; report_error( - wrapper{}, + type_wrapper{}, Spec.radix, numeric::parse_int(f, l, value)); return {id, (long long)value}; @@ -659,7 +667,7 @@ namespace boost { namespace parser { case token_parsed_type::unsigned_long: { unsigned long value; report_error( - wrapper{}, + type_wrapper{}, Spec.radix, numeric::parse_int(f, l, value)); return {id, (long long)value}; @@ -667,7 +675,7 @@ namespace boost { namespace parser { case token_parsed_type::long_long: { long long value; report_error( - wrapper{}, + type_wrapper{}, Spec.radix, numeric::parse_int(f, l, value)); return {id, (long long)value}; @@ -675,7 +683,7 @@ namespace boost { namespace parser { case token_parsed_type::unsigned_long_long: { unsigned long long value; report_error( - wrapper{}, + type_wrapper{}, Spec.radix, numeric::parse_int(f, l, value)); return {id, (long long)value}; @@ -683,7 +691,7 @@ namespace boost { namespace parser { case token_parsed_type::wchar_t_: { unsigned int value; report_error( - wrapper{}, + type_wrapper{}, Spec.radix, numeric::parse_int(f, l, value)); return {id, (long long)value}; @@ -691,7 +699,7 @@ namespace boost { namespace parser { case token_parsed_type::char8_t_: { unsigned int value; report_error( - wrapper{}, + type_wrapper{}, Spec.radix, numeric::parse_int(f, l, value)); return {id, (long long)value}; @@ -699,7 +707,7 @@ namespace boost { namespace parser { case token_parsed_type::char16_t_: { unsigned int value; report_error( - wrapper{}, + type_wrapper{}, Spec.radix, numeric::parse_int(f, l, value)); return {id, (long long)value}; @@ -707,7 +715,7 @@ namespace boost { namespace parser { case token_parsed_type::char32_t_: { unsigned int value; report_error( - wrapper{}, + type_wrapper{}, Spec.radix, numeric::parse_int(f, l, value)); return {id, (long long)value}; @@ -716,7 +724,7 @@ namespace boost { namespace parser { case token_parsed_type::float_: { float value; report_error( - wrapper{}, + type_wrapper{}, 0, numeric::parse_real(f, l, value)); return {id, (long double)value}; @@ -724,7 +732,7 @@ namespace boost { namespace parser { case token_parsed_type::double_: { double value; report_error( - wrapper{}, + type_wrapper{}, 0, numeric::parse_real(f, l, value)); return {id, (long double)value}; @@ -732,7 +740,7 @@ namespace boost { namespace parser { case token_parsed_type::long_double: { long double value; report_error( - wrapper{}, + type_wrapper{}, 0, numeric::parse_real(f, l, value)); return {id, value}; @@ -1014,4 +1022,6 @@ namespace boost { namespace parser { }} +#include + #endif diff --git a/include/boost/parser/parser.hpp b/include/boost/parser/parser.hpp index 2bb104d7..641f84e4 100644 --- a/include/boost/parser/parser.hpp +++ b/include/boost/parser/parser.hpp @@ -9421,4 +9421,6 @@ namespace boost { namespace parser { } }} +#include + #endif diff --git a/include/boost/parser/parser_fwd.hpp b/include/boost/parser/parser_fwd.hpp index 4f737373..ce1558d4 100644 --- a/include/boost/parser/parser_fwd.hpp +++ b/include/boost/parser/parser_fwd.hpp @@ -428,6 +428,10 @@ namespace boost { namespace parser { template struct float_parser; + /** TODO */ + template + struct token_parser; + /** Applies at most one of the parsers in `OrParser`. If `switch_value_` matches one or more of the values in the parsers in `OrParser`, the first such parser is applied, and the success or failure and attribute diff --git a/include/boost/parser/token_parser.hpp b/include/boost/parser/token_parser.hpp new file mode 100644 index 00000000..898057ec --- /dev/null +++ b/include/boost/parser/token_parser.hpp @@ -0,0 +1,142 @@ +// Copyright (C) 2024 T. Zachary Laine +// +// Distributed under the Boost Software License, Version 1.0. (See +// accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_PARSER_TOKEN_PARSER_HPP +#if defined(BOOST_PARSER_PARSER_HPP) && defined(BOOST_PARSER_LEXER_HPP) +#define BOOST_PARSER_TOKEN_PARSER_HPP + +#include +#include +#include +// TODO #include +// TODO #include +// TODO #include +// TODO #include +// TODO #include +// TODO #include +// TODO #include +// TODO #include +// TODO +// TODO #include +// TODO #include +// TODO #include +// TODO +// TODO #include +// TODO #include +// TODO #include + + +namespace boost { namespace parser { + + namespace detail { + template + std::optional token_as(token tok) + { + if constexpr (std::same_as) { + return nope{}; + } else if constexpr (std::same_as< + AttributeType, + std::basic_string_view>) { + if (tok.has_string_view()) + return tok.get_string_view(); + return std::nullopt; + } else if constexpr (std::is_floating_point_v) { + if (tok.has_long_double()) + return tok.get_long_double(); + return std::nullopt; + } else if constexpr (std::is_integral_v) { + if (tok.has_long_long()) + return AttributeType(tok.get_long_long()); + return std::nullopt; + } else { + static_assert( + !std::same_as, + "The only attribute types that can be pulled out of a " + "token are no-attribute, floating-point values, or " + "integral values (including charater types)."); + } + } + } + +#ifndef BOOST_PARSER_DOXYGEN + + // TODO: Needs a printer. + template + struct token_parser + { + using attribute_type = std::conditional_t< + std::is_same_v, + detail::nope, + AttributeType>; + + constexpr token_parser() {} + + template< + typename Iter, + typename Sentinel, + typename Context, + typename SkipParser> + auto call( + Iter & first, + Sentinel last, + Context const & context, + SkipParser const & skip, + detail::flags flags, + bool & success) const -> attribute_type + { + attribute_type retval; + call(first, last, context, skip, flags, success, retval); + return retval; + } + + template< + typename Iter, + typename Sentinel, + typename Context, + typename SkipParser, + typename Attribute> + void call( + Iter & first, + Sentinel last, + Context const & context, + SkipParser const & skip, + detail::flags flags, + bool & success, + Attribute & retval) const + { + using value_type = std::remove_cvref_t; + static_assert( + is_token_v, + "token_parser can only be used when parsing sequences of " + "tokens."); + + [[maybe_unused]] auto _ = detail::scoped_trace( + *this, first, last, context, flags, retval); + + if (first == last) { + success = false; + return; + } + value_type const x = *first; + // TODO: Test for equality with some expectation, if any. + auto opt_attr = detail::token_as(x); + if (!opt_attr) { + success = false; + return; + } + detail::assign(retval, *opt_attr); + ++first; + } + }; + +#endif + + /** TODO */ + constexpr parser_interface> tok; + +}} + +#endif +#endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3ec32c57..878c8c49 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -48,6 +48,8 @@ add_test(NAME parser_api COMMAND parser_api) add_executable( compile_tests + compile_include_lexer_parser.cpp + compile_include_parser_lexer.cpp compile_tests_main.cpp compile_attribute.cpp compile_seq_attribute.cpp @@ -56,7 +58,7 @@ add_executable( compile_all_t.cpp ) set_property(TARGET compile_tests PROPERTY CXX_STANDARD ${CXX_STD}) -target_link_libraries(compile_tests parser boost) +target_link_libraries(compile_tests parser boost ctre_single_header) macro(add_test_executable name) add_executable(${name} ${name}.cpp) @@ -102,6 +104,7 @@ add_test_executable(parser_or_permutations_2) if (CXX_STD GREATER_EQUAL 20) add_test_executable(lexer) add_test_executable(lexer_adobe_files) + add_test_executable(lexer_and_parser) endif() if (MSVC) diff --git a/test/compile_include_lexer_parser.cpp b/test/compile_include_lexer_parser.cpp new file mode 100644 index 00000000..11b8e9aa --- /dev/null +++ b/test/compile_include_lexer_parser.cpp @@ -0,0 +1,10 @@ +// Copyright (C) 2024 T. Zachary Laine +// +// Distributed under the Boost Software License, Version 1.0. (See +// accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +#include +#if BOOST_PARSER_USE_CONCEPTS +#include +#endif +#include diff --git a/test/compile_include_parser_lexer.cpp b/test/compile_include_parser_lexer.cpp new file mode 100644 index 00000000..343b1988 --- /dev/null +++ b/test/compile_include_parser_lexer.cpp @@ -0,0 +1,9 @@ +// Copyright (C) 2024 T. Zachary Laine +// +// Distributed under the Boost Software License, Version 1.0. (See +// accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +#include +#if BOOST_PARSER_USE_CONCEPTS +#include +#endif diff --git a/test/lexer.cpp b/test/lexer.cpp index 3b7d4376..8a2b445b 100644 --- a/test/lexer.cpp +++ b/test/lexer.cpp @@ -24,8 +24,6 @@ enum class my_tokens { ws, foo, bar, baz }; int main() { -#if BOOST_PARSER_USE_CONCEPTS - // formation of token_specs { auto const token_spec = bp::token_spec<"foo", 12>; @@ -493,7 +491,6 @@ int main() // CTRE_STRING_IS_UTF8 is defined, and no encoding otherwise. Also document // that char16_t is treated as UTF-16, but wchar_t and char32_t are *both* // treated as UTF-32, even on windows. -#endif return boost::report_errors(); } diff --git a/test/lexer_adobe_files.cpp b/test/lexer_adobe_files.cpp index c40f4ef0..c27cc201 100644 --- a/test/lexer_adobe_files.cpp +++ b/test/lexer_adobe_files.cpp @@ -38,8 +38,6 @@ enum class adobe_tokens { int main() { -#if BOOST_PARSER_USE_CONCEPTS - { // Document that maximum munch does not appear to apply -- putting "<==" // after "<|>|<=|>=" causes input "<==" to be tokenized as "<", "==". @@ -846,7 +844,5 @@ sheet image_size } } -#endif - return boost::report_errors(); } diff --git a/test/lexer_and_parser.cpp b/test/lexer_and_parser.cpp new file mode 100644 index 00000000..6ef10187 --- /dev/null +++ b/test/lexer_and_parser.cpp @@ -0,0 +1,62 @@ +/** + * Copyright (C) 2024 T. Zachary Laine + * + * Distributed under the Boost Software License, Version 1.0. (See + * accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ + +#include +#include + +#include + + +namespace bp = boost::parser; + + +int main() +{ + // Minimal test; just instantiate the member functions, without involving + // the parse() API. + { + bp::token tokens[1] = {}; + bp::token_parser p; + auto first = std::begin(tokens); + auto const last = std::end(tokens); + + bp::detail::nope globals; + bp::default_error_handler error_handler; + + // From parse_impl(). + bool success = true; + int trace_indent = 0; + bp::detail::symbol_table_tries_t symbol_table_tries; + bp::detail::pending_symbol_table_operations_t + pending_symbol_table_operations; + bp::detail::scoped_apply_pending_symbol_table_operations apply_pending( + pending_symbol_table_operations); + auto context = bp::detail::make_context( + first, + last, + success, + trace_indent, + error_handler, + globals, + symbol_table_tries, + pending_symbol_table_operations); + auto const flags = bp::detail::flags::gen_attrs; + + std::optional result = + p.call(first, last, context, bp::ws, flags, success); + (void)result; + } + + // TODO { + // TODO std::string str = "a"; + // TODO BOOST_TEST(parse(str, char_)); + // TODO BOOST_TEST(!parse(str, char_('b'))); + // TODO } + + return boost::report_errors(); +}