Skip to content

Commit

Permalink
Change token<> to include the position of the start of the token in the
Browse files Browse the repository at this point in the history
underlying sequence, and change the way that the error handler is invoked, so
that it detects token iterators, and passes iterators into the underlying
range to the error handler, instead of the token iterators.

See #202.
  • Loading branch information
tzlaine committed Nov 10, 2024
1 parent 0935656 commit c6331b2
Show file tree
Hide file tree
Showing 5 changed files with 682 additions and 544 deletions.
10 changes: 10 additions & 0 deletions include/boost/parser/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@
also defined. */
# define BOOST_PARSER_TRACE_TO_VS_OUTPUT

/** When lexing is enabled, each token contains its position within the
underlying range. To save a bit of space, an `unsiged int` is used for
this. If you parse input sequences longer than 2^32-1 characters, define
`BOOST_PARSER_TOKEN_POSITION_TYPE` to be a larger integral type. */
# define BOOST_PARSER_TOKEN_POSITION_TYPE unsigned int

#else

# ifdef BOOST_PARSER_NO_RUNTIME_ASSERTIONS
Expand Down Expand Up @@ -103,6 +109,10 @@
# define BOOST_PARSER_MAX_AGGREGATE_SIZE 25
#endif

#if !defined(BOOST_PARSER_TOKEN_POSITION_TYPE)
# define BOOST_PARSER_TOKEN_POSITION_TYPE unsigned int
#endif

// VS2019 and VS2017 need conditional constexpr in some places, even in C++17 mode.
#if !defined(_MSC_VER) || 1930 <= _MSC_VER
# define BOOST_PARSER_CONSTEXPR constexpr
Expand Down
108 changes: 71 additions & 37 deletions include/boost/parser/lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,27 +151,47 @@ namespace boost { namespace parser {
{
using char_type = CharType;
using string_view = std::basic_string_view<CharType>;
using position_type = BOOST_PARSER_TOKEN_POSITION_TYPE;

constexpr token() :
value_(0ll), id_(), kind_(detail::token_kind::string_view)
value_(0ll),
underlying_position_(),
id_(),
kind_(detail::token_kind::string_view)
{}
constexpr token(int id, string_view value) :
value_(0ll), id_(id), kind_(detail::token_kind::string_view)
constexpr token(
int id, position_type underlying_position, string_view value) :
value_(0ll),
underlying_position_(),
id_(id),
kind_(detail::token_kind::string_view)
{
value_.sv_ = value;
}
constexpr token(int id, long long value) :
value_(0ll), id_(id), kind_(detail::token_kind::long_long)
constexpr token(
int id, position_type underlying_position, long long value) :
value_(0ll),
underlying_position_(underlying_position),
id_(id),
kind_(detail::token_kind::long_long)
{
value_.ll_ = value;
}
constexpr token(int id, long double value) :
value_(0ll), id_(id), kind_(detail::token_kind::long_double)
constexpr token(
int id, position_type underlying_position, long double value) :
value_(0ll),
underlying_position_(underlying_position),
id_(id),
kind_(detail::token_kind::long_double)
{
value_.d_ = value;
}

constexpr int id() const { return id_; }
constexpr position_type underlying_position() const
{
return underlying_position_;
}

constexpr bool has_string_view() const
{
Expand Down Expand Up @@ -238,6 +258,7 @@ namespace boost { namespace parser {
long double d_;
string_view sv_;
} value_;
position_type underlying_position_ = 0;
// TODO: Document the 22-bit size limitation on id_ (values must be
// positive).
int id_ : 24;
Expand Down Expand Up @@ -539,8 +560,10 @@ namespace boost { namespace parser {
};

template<parse_spec Spec, typename CharType>
token<CharType>
make_token(int id, std::basic_string_view<CharType> ctre_token)
token<CharType> make_token(
int id,
std::basic_string_view<CharType> ctre_token,
BOOST_PARSER_TOKEN_POSITION_TYPE underlying_position)
{
auto f = ctre_token.data();
auto const l = f + ctre_token.size();
Expand All @@ -553,16 +576,20 @@ namespace boost { namespace parser {

switch (Spec.type) {
case token_parsed_type::character:
return {character_id, (long long)ctre_token[0]};
return {
character_id,
underlying_position,
(long long)ctre_token[0]};

case token_parsed_type::string_view: return {id, ctre_token};
case token_parsed_type::string_view:
return {id, underlying_position, ctre_token};

case token_parsed_type::bool_:
using namespace std::literals;
if (std::ranges::equal(ctre_token, "true"sv)) {
return {id, 1ll};
return {id, underlying_position, 1ll};
} else if (std::ranges::equal(ctre_token, "false"sv)) {
return {id, 0ll};
return {id, underlying_position, 0ll};
} else {
// TODO: report error.
}
Expand All @@ -573,111 +600,111 @@ namespace boost { namespace parser {
type_wrapper<decltype(value)>{},
Spec.radix,
numeric::parse_int<true, Spec.radix, 1, -1>(f, l, value));
return {id, (long long)value};
return {id, underlying_position, (long long)value};
}
case token_parsed_type::unsigned_char: {
unsigned char value;
report_error(
type_wrapper<decltype(value)>{},
Spec.radix,
numeric::parse_int<false, Spec.radix, 1, -1>(f, l, value));
return {id, (long long)value};
return {id, underlying_position, (long long)value};
}
case token_parsed_type::short_: {
short value;
report_error(
type_wrapper<decltype(value)>{},
Spec.radix,
numeric::parse_int<true, Spec.radix, 1, -1>(f, l, value));
return {id, (long long)value};
return {id, underlying_position, (long long)value};
}
case token_parsed_type::unsigned_short: {
unsigned short value;
report_error(
type_wrapper<decltype(value)>{},
Spec.radix,
numeric::parse_int<false, Spec.radix, 1, -1>(f, l, value));
return {id, (long long)value};
return {id, underlying_position, (long long)value};
}
case token_parsed_type::int_: {
int value;
report_error(
type_wrapper<decltype(value)>{},
Spec.radix,
numeric::parse_int<true, Spec.radix, 1, -1>(f, l, value));
return {id, (long long)value};
return {id, underlying_position, (long long)value};
}
case token_parsed_type::unsigned_int: {
unsigned int value;
report_error(
type_wrapper<decltype(value)>{},
Spec.radix,
numeric::parse_int<false, Spec.radix, 1, -1>(f, l, value));
return {id, (long long)value};
return {id, underlying_position, (long long)value};
}
case token_parsed_type::long_: {
long value;
report_error(
type_wrapper<decltype(value)>{},
Spec.radix,
numeric::parse_int<true, Spec.radix, 1, -1>(f, l, value));
return {id, (long long)value};
return {id, underlying_position, (long long)value};
}
case token_parsed_type::unsigned_long: {
unsigned long value;
report_error(
type_wrapper<decltype(value)>{},
Spec.radix,
numeric::parse_int<false, Spec.radix, 1, -1>(f, l, value));
return {id, (long long)value};
return {id, underlying_position, (long long)value};
}
case token_parsed_type::long_long: {
long long value;
report_error(
type_wrapper<decltype(value)>{},
Spec.radix,
numeric::parse_int<true, Spec.radix, 1, -1>(f, l, value));
return {id, (long long)value};
return {id, underlying_position, (long long)value};
}
case token_parsed_type::unsigned_long_long: {
unsigned long long value;
report_error(
type_wrapper<decltype(value)>{},
Spec.radix,
numeric::parse_int<false, Spec.radix, 1, -1>(f, l, value));
return {id, (long long)value};
return {id, underlying_position, (long long)value};
}
case token_parsed_type::wchar_t_: {
unsigned int value;
report_error(
type_wrapper<wchar_t>{},
Spec.radix,
numeric::parse_int<false, Spec.radix, 1, -1>(f, l, value));
return {id, (long long)value};
return {id, underlying_position, (long long)value};
}
case token_parsed_type::char8_t_: {
unsigned int value;
report_error(
type_wrapper<char8_t>{},
Spec.radix,
numeric::parse_int<false, Spec.radix, 1, -1>(f, l, value));
return {id, (long long)value};
return {id, underlying_position, (long long)value};
}
case token_parsed_type::char16_t_: {
unsigned int value;
report_error(
type_wrapper<char16_t>{},
Spec.radix,
numeric::parse_int<false, Spec.radix, 1, -1>(f, l, value));
return {id, (long long)value};
return {id, underlying_position, (long long)value};
}
case token_parsed_type::char32_t_: {
unsigned int value;
report_error(
type_wrapper<char32_t>{},
Spec.radix,
numeric::parse_int<false, Spec.radix, 1, -1>(f, l, value));
return {id, (long long)value};
return {id, underlying_position, (long long)value};
}

case token_parsed_type::float_: {
Expand All @@ -686,30 +713,30 @@ namespace boost { namespace parser {
type_wrapper<decltype(value)>{},
0,
numeric::parse_real(f, l, value));
return {id, (long double)value};
return {id, underlying_position, (long double)value};
}
case token_parsed_type::double_: {
double value;
report_error(
type_wrapper<decltype(value)>{},
0,
numeric::parse_real(f, l, value));
return {id, (long double)value};
return {id, underlying_position, (long double)value};
}
case token_parsed_type::long_double: {
long double value;
report_error(
type_wrapper<decltype(value)>{},
0,
numeric::parse_real(f, l, value));
return {id, value};
return {id, underlying_position, value};
}
case token_parsed_type::ws:
default:
#if defined(__cpp_lib_unreachable)
std::unreachable();
#endif
return {id, 0ll};
return {id, underlying_position, 0ll};
}
}
}
Expand All @@ -721,7 +748,7 @@ namespace boost { namespace parser {

/** TODO */
template<
std::ranges::forward_range V,
std::ranges::contiguous_range V,
typename Lexer,
typename TokenCache = std::vector<typename Lexer::token_type>>
requires std::ranges::view<V>
Expand Down Expand Up @@ -754,7 +781,7 @@ namespace boost { namespace parser {
lexer_(std::move(lexer)),
tokens_(owned_cache_)
{
latest_ = base_.begin();
latest_ = std::ranges::begin(base_);
}
constexpr explicit tokens_view(
V base,
Expand All @@ -764,7 +791,7 @@ namespace boost { namespace parser {
lexer_(std::move(lexer)),
tokens_(external_cache.get())
{
latest_ = base_.begin();
latest_ = std::ranges::begin(base_);
}

constexpr V base() const &
Expand Down Expand Up @@ -830,7 +857,7 @@ namespace boost { namespace parser {
parent_->tokens_.reserve(new_size);

auto r = std::ranges::subrange(
parent_->latest_, parent_->base_.end());
parent_->latest_, std::ranges::end(parent_->base_));
auto ctre_range = Lexer::regex_range(r);
auto ctre_first = ctre_range.begin();
auto const ctre_last = ctre_range.end();
Expand Down Expand Up @@ -864,7 +891,11 @@ namespace boost { namespace parser {
constexpr detail::parse_spec parse_spec =
parent_->lexer_.specs()[i.value];
parent_->tokens_.push_back(
detail::make_token<parse_spec>(id, sv));
detail::make_token<parse_spec>(
id,
sv,
ctre_first.current -
ctre_first.orig_begin));
return sv;
} else {
return state;
Expand Down Expand Up @@ -902,6 +933,9 @@ namespace boost { namespace parser {
BOOST_PARSER_DEBUG_ASSERT(parent_ == rhs.parent_);
return token_offset_ == rhs.token_offset_;
}

auto range_begin() const { std::ranges::begin(parent_->base_); }
auto range_end() const { std::ranges::end(parent_->base_); }
};

template<bool Const>
Expand Down Expand Up @@ -936,7 +970,7 @@ namespace boost { namespace parser {
if (x.token_offset_ != x.parent_->tokens_.size())
return false;
auto r = std::ranges::subrange(
x.parent_->latest_, x.parent_->base_.end());
x.parent_->latest_, std::ranges::end(x.parent_->base_));
auto ctre_range = Lexer::regex_range(r);
return !ctre_range.begin().current_match;
}
Expand Down
Loading

0 comments on commit c6331b2

Please sign in to comment.