Skip to content

Commit

Permalink
Completely rethink the relationship among token_parser, token_spec_t,…
Browse files Browse the repository at this point in the history
… and

parser_interface.  token_spec is now a variable template that generates a
parser_interface wrapping a token_parser, which parameterized on the
token_spec_t.  This way, a single token_spec use can be used to specify how to
lex, and how to parse.

See #202.
  • Loading branch information
tzlaine committed Nov 9, 2024
1 parent ba2d757 commit c624681
Show file tree
Hide file tree
Showing 7 changed files with 250 additions and 301 deletions.
66 changes: 17 additions & 49 deletions include/boost/parser/lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"In order to work, the Boost.Parser lexer requires C++20 and CTRE's ctre-unicode.hpp single-header file in the #include path. CTRE can be found at https://github.com/hanickadot/compile-time-regular-expressions . The required header is at https://raw.githubusercontent.com/hanickadot/compile-time-regular-expressions/refs/heads/main/single-header/ctre-unicode.hpp ."
#endif

#include <boost/parser/parser_fwd.hpp>
#include <boost/parser/concepts.hpp>
#include <boost/parser/detail/debug_assert.hpp>
#include <boost/parser/detail/hl.hpp>
Expand Down Expand Up @@ -46,8 +47,6 @@ std::ostream & operator<<(std::ostream & os, std::array<T, N> const & arr)

namespace boost { namespace parser {

struct none;

namespace detail {
enum class token_kind { no_value, string_view, long_long, long_double };

Expand Down Expand Up @@ -281,7 +280,7 @@ namespace boost { namespace parser {
constexpr auto base = TokenSpec::base;
if constexpr (TokenSpec::is_character_token) {
return parse_spec{token_parsed_type::character, base};
} else if constexpr (std::is_same_v<value_t, none>) {
} else if constexpr (std::is_same_v<value_t, string_view_tag>) {
return parse_spec{token_parsed_type::string_view, base};
} else if constexpr (std::is_same_v<value_t, bool>) {
return parse_spec{token_parsed_type::bool_, base};
Expand Down Expand Up @@ -321,11 +320,15 @@ namespace boost { namespace parser {
return parse_spec{token_parsed_type::long_double, base};
} else {
static_assert(
!std::is_same_v<value_t, value_t>,
!std::is_same_v<TokenSpec, TokenSpec>,
"The only valid types for the 'Value' template parameter "
"to 'lexer_token_spec' are 'none', integral types, and "
"floating-point types.");
"to 'token_spec' are 'string_view_tag', integral types, "
"and floating-point types.");
}
#if defined(__cpp_lib_unreachable)
std::unreachable();
return parse_spec{token_parsed_type::string_view, base};
#endif
}

template<char Ch, auto... Chs>
Expand Down Expand Up @@ -417,11 +420,7 @@ namespace boost { namespace parser {
}

/** TODO */
template<
ctll::fixed_string Regex,
auto ID,
typename ValueType = none,
int Base = 10>
template<ctll::fixed_string Regex, auto ID, typename ValueType, int Base>
struct token_spec_t
{
using id_type = decltype(ID);
Expand All @@ -438,26 +437,8 @@ namespace boost { namespace parser {
static constexpr id_type id = ID;
static constexpr int base = Base < 0 ? 10 : Base;
static constexpr bool is_character_token = Base < 0;

/** TODO */
template<typename ID2>
constexpr auto operator()(ID2 id) const noexcept;

/** TODO */
template<typename ID2, typename Value>
constexpr auto operator()(ID2 id, Value value) const noexcept;

// implementations in token_parser.hpp
};

/** TODO */
template<
ctll::fixed_string Regex,
auto ID,
typename ValueType = none,
int Base = 10>
constexpr auto token_spec = token_spec_t<Regex, ID, ValueType, Base>{};

// TODO: Document that this takes a pack of char -- and nothing else. Also
// note that for anything more complicated, including a short UTF-8 sequence
// that encodes a code point, you must use the token_spec form.
Expand Down Expand Up @@ -489,33 +470,20 @@ namespace boost { namespace parser {
static constexpr auto ids() { return IDs.as_array(); }
static constexpr auto specs() { return Specs.as_array(); }

// implementation in token_parser.hpp
template<
ctll::fixed_string RegexStr2,
auto ID2,
typename ValueType,
int Base>
auto operator|(token_spec_t<RegexStr2, ID2, ValueType, Base> rhs) const
{
static_assert(
std::same_as<ID, decltype(ID2)>,
"All id_types must be the same for all token_specs.");
constexpr auto new_regex =
detail::wrap_escape_concat<regex_str, RegexStr2>();
constexpr auto new_ids = IDs.template append<(int)ID2>();
constexpr auto new_specs =
Specs
.template append<detail::parse_spec_for<decltype(rhs)>()>();
return lexer_t<
CharType,
ID,
WsStr,
new_regex,
new_ids,
new_specs>{};
}
constexpr auto operator|(
parser_interface<token_parser<
token_spec_t<RegexStr2, ID2, ValueType, Base>>> const & rhs)
const;

template<auto Ch, auto... Chs>
auto operator|(detail::token_chars_spec<Ch, Chs...> rhs) const
constexpr auto
operator|(detail::token_chars_spec<Ch, Chs...> const & rhs) const
{
constexpr auto new_regex =
detail::wrap_escape_concat<regex_str, Ch, Chs...>();
Expand Down
15 changes: 6 additions & 9 deletions include/boost/parser/parser_fwd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,8 @@ namespace boost { namespace parser {
struct upper_case_chars
{};

struct any_token
struct any_token_value
{
bool matches_id(int) const { return true; }

template<typename T>
bool matches_value(T) const
{
Expand Down Expand Up @@ -445,15 +443,14 @@ namespace boost { namespace parser {
template<typename T>
struct float_parser;

/** A tag type used to stand in for any specialization of
`boost::parser::token<>`. */
struct token_tag
/** A tag type used to represent a value type that is any specialization
of `std::basic_string_view`. Which specialization is used depends on
the input. */
struct string_view_tag
{};

/** TODO */
template<
typename AttributeType = token_tag,
typename Expected = detail::any_token>
template<typename TokenSpec, typename Expected = detail::nope>
struct token_parser;

/** Applies at most one of the parsers in `OrParser`. If `switch_value_`
Expand Down
167 changes: 65 additions & 102 deletions include/boost/parser/token_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,7 @@ namespace boost { namespace parser {
template<typename AttributeType, typename CharType>
std::optional<AttributeType> token_as(token<CharType> tok)
{
if constexpr (std::same_as<AttributeType, nope>) {
return nope{};
} else if constexpr (std::same_as<
AttributeType,
std::basic_string_view<CharType>>) {
if constexpr (std::same_as<AttributeType, string_view_tag>) {
if (tok.has_string_view())
return tok.get_string_view();
return std::nullopt;
Expand All @@ -43,32 +39,11 @@ namespace boost { namespace parser {
}
}

struct token_with_id
{
explicit token_with_id(int id) : id_(id) {}

bool matches(int id) const { return id == id_; }

template<typename T>
bool matches_value(T) const
{
return true;
}

int id_;
};

template<typename T>
struct token_with_id_and_value
struct token_with_value
{
explicit token_with_id_and_value(int id, T value) :
id_(id), value_(value)
{}

bool matches(int id) const { return id == id_; }
bool matches_value(T value) const { return value == value_; }

int id_;
explicit token_with_value(T value) : value_(std::move(value)) {}
bool matches(T const & value) const { return value == value_; }
T value_;
};
}
Expand All @@ -78,18 +53,15 @@ namespace boost { namespace parser {
// TODO: Needs a printer.
// TODO: Constrain the AttributeType to something that detail::token_as()
// can handle.
template<typename AttributeType, typename Expected>
template<typename TokenSpec, typename Expected>
struct token_parser
{
using attribute_type = std::conditional_t<
std::is_same_v<AttributeType, void>,
detail::nope,
AttributeType>;
using token_spec = TokenSpec;

using expected_value_type = std::conditional_t<
std::is_same_v<attribute_type, token_tag>,
detail::nope,
attribute_type>;
using attribute_type = std::conditional_t<
std::is_same_v<typename token_spec::value_type, none>,
string_view_tag,
typename token_spec::value_type>;

constexpr token_parser() = default;
constexpr token_parser(Expected expected) : expected_(expected) {}
Expand Down Expand Up @@ -142,97 +114,88 @@ namespace boost { namespace parser {
}

value_type const x = *first;
if (!expected_.matches_id(x.id())) {
if (x.id() != token_spec::id) {
success = false;
return;
}

if constexpr (std::same_as<AttributeType, token_tag>) {
detail::assign(retval, x);
} else {
constexpr bool use_expected = !std::same_as<Expected, detail::nope>;
if (use_expected || detail::gen_attrs(flags)) {
auto opt_attr = detail::token_as<attribute_type>(x);
if (!opt_attr || !expected_.matches_value(*opt_attr)) {
success = false;
return;
if constexpr (use_expected) {
if (!opt_attr || !expected_.matches_value(*opt_attr)) {
success = false;
return;
}
}
detail::assign(retval, *opt_attr);
if (detail::gen_attrs(flags))
detail::assign(retval, *opt_attr);
}

++first;
}

// TODO: Constrain all ID params below (incl. the ones from
// token_spec_t) only to accept type convertible to int.

/** TODO */
template<typename ID>
constexpr auto operator()(ID id) const noexcept
{
BOOST_PARSER_ASSERT(
(detail::is_nope_v<Expected> &&
"If you're seeing this, you tried to chain calls on tok, "
"tok_t, or one of your token_spec_t's, like 'tok(id1)(id2)'. "
"Quit it!'"));
return parser_interface(
token_parser<AttributeType, detail::token_with_id>(
detail::token_with_id((int)id)));
}

/** TODO */
template<typename ID>
constexpr auto
operator()(ID id, expected_value_type value) const noexcept
constexpr auto operator()(attribute_type value) const noexcept
{
BOOST_PARSER_ASSERT(
(detail::is_nope_v<Expected> &&
"If you're seeing this, you tried to chain calls on tok, "
"tok_t, or one of your token_spec_t's, like 'tok(id1)(id2)'. "
"Quit it!'"));
return parser_interface(
token_parser<
AttributeType,
detail::token_with_id_and_value<expected_value_type>>(
detail::token_with_id_and_value((int)id, value)));
"If you're seeing this, you tried to chain calls on one of "
"your token_spec's, like 'my_token_spec(id1)(id2)'. Quit "
"it!'"));
return parser_interface(token_parser<
TokenSpec,
detail::token_with_value<attribute_type>>(
detail::token_with_value(std::move(value))));
}

Expected expected_;
};

template<ctll::fixed_string Regex, auto ID, typename ValueType, int Base>
template<typename ID2>
constexpr auto
token_spec_t<Regex, ID, ValueType, Base>::operator()(ID2 id) const noexcept
{
using attribute_type = std::
conditional_t<std::same_as<ValueType, none>, token_tag, ValueType>;
return parser_interface(
token_parser<attribute_type, detail::token_with_id>(
detail::token_with_id((int)id)));
}
#endif

/** TODO */
template<
ctll::fixed_string Regex,
auto ID,
typename ValueType = string_view_tag,
int Base = 10>
constexpr parser_interface token_spec{
token_parser<token_spec_t<Regex, ID, ValueType, Base>>()};

#ifndef BOOST_PARSER_DOXYGEN

template<ctll::fixed_string Regex, auto ID, typename ValueType, int Base>
template<typename ID2, typename Value>
constexpr auto token_spec_t<Regex, ID, ValueType, Base>::operator()(
ID2 id, Value value) const noexcept
template<
typename CharType,
typename ID,
ctll::fixed_string WsStr,
ctll::fixed_string RegexStr,
detail::nttp_array IDs,
detail::nttp_array Specs>
template<
ctll::fixed_string RegexStr2,
auto ID2,
typename ValueType,
int Base>
constexpr auto
lexer_t<CharType, ID, WsStr, RegexStr, IDs, Specs>::operator|(
parser_interface<token_parser<
token_spec_t<RegexStr2, ID2, ValueType, Base>>> const &) const
{
using attribute_type = std::
conditional_t<std::same_as<ValueType, none>, token_tag, ValueType>;
return parser_interface(token_parser<
attribute_type,
detail::token_with_id_and_value<Value>>(
detail::token_with_id_and_value((int)id, value)));
static_assert(
std::same_as<ID, decltype(ID2)>,
"All id_types must be the same for all token_specs.");
constexpr auto new_regex =
detail::wrap_escape_concat<regex_str, RegexStr2>();
constexpr auto new_ids = IDs.template append<(int)ID2>();
constexpr auto new_specs = Specs.template append<detail::parse_spec_for<
token_spec_t<RegexStr2, ID2, ValueType, Base>>()>();
return lexer_t<CharType, ID, WsStr, new_regex, new_ids, new_specs>{};
}


#endif

/** TODO */
constexpr parser_interface<token_parser<>> tok;

/** TODO */
template<typename AttributeType>
constexpr parser_interface<token_parser<AttributeType>> tok_t;

}}

#endif
Expand Down
Loading

0 comments on commit c624681

Please sign in to comment.