Skip to content

Commit

Permalink
Add a range adaptor for token_view; token_view -> tokens_view. Break
Browse files Browse the repository at this point in the history
detail::make_input_subrange() out into its own header for reuse.

See #202.
  • Loading branch information
tzlaine committed Nov 5, 2024
1 parent 64e8d4e commit 4ac5244
Show file tree
Hide file tree
Showing 4 changed files with 193 additions and 122 deletions.
118 changes: 118 additions & 0 deletions include/boost/parser/detail/make_input_subrange.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// Copyright (C) 2024 T. Zachary Laine
//
// Distributed under the Boost Software License, Version 1.0. (See
// accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#ifndef BOOST_PARSER_DETAIL_MAKE_INPUT_SUBRANGE_HPP
#define BOOST_PARSER_DETAIL_MAKE_INPUT_SUBRANGE_HPP

#include <boost/parser/config.hpp>
#include <boost/parser/subrange.hpp>

#include <type_traits>


namespace boost::parser::detail {

template<typename T>
struct is_utf8_view : std::false_type
{};
template<typename V>
struct is_utf8_view<text::utf8_view<V>> : std::true_type
{};

#if BOOST_PARSER_USE_CONCEPTS

template<typename T>
using iterator_t = std::ranges::iterator_t<T>;
template<typename T>
using sentinel_t = std::ranges::sentinel_t<T>;
template<typename T>
using iter_value_t = std::iter_value_t<T>;
template<typename T>
using iter_reference_t = std::iter_reference_t<T>;
template<typename T>
using range_value_t = std::ranges::range_value_t<T>;
template<typename T>
using range_reference_t = std::ranges::range_reference_t<T>;
template<typename T>
using range_rvalue_reference_t = std::ranges::range_rvalue_reference_t<T>;

template<typename T>
constexpr bool is_parsable_code_unit_v = code_unit<T>;

#else

template<typename T>
using iterator_t =
decltype(detail::text::detail::begin(std::declval<T &>()));
template<typename Range>
using sentinel_t =
decltype(detail::text::detail::end(std::declval<Range &>()));
template<typename T>
using iter_value_t = typename std::iterator_traits<T>::value_type;
template<typename T>
using iter_reference_t = decltype(*std::declval<T &>());
template<typename T>
using iter_rvalue_reference_t = decltype(std::move(*std::declval<T &>()));
template<typename T>
using range_value_t = iter_value_t<iterator_t<T>>;
template<typename T>
using range_reference_t = iter_reference_t<iterator_t<T>>;
template<typename T>
using range_rvalue_reference_t = iter_rvalue_reference_t<iterator_t<T>>;

template<typename T>
constexpr bool is_parsable_code_unit_impl =
std::is_same_v<T, char> || std::is_same_v<T, wchar_t> ||
#if defined(__cpp_char8_t)
std::is_same_v<T, char8_t> ||
#endif
std::is_same_v<T, char16_t> || std::is_same_v<T, char32_t>;

template<typename T>
constexpr bool is_parsable_code_unit_v =
is_parsable_code_unit_impl<std::remove_cv_t<T>>;

#endif

template<typename R>
constexpr auto make_input_subrange(R && r) noexcept
{
using r_t = remove_cv_ref_t<R>;
if constexpr (std::is_pointer_v<r_t>) {
using value_type = iter_value_t<r_t>;
if constexpr (std::is_same_v<value_type, char>) {
return BOOST_PARSER_SUBRANGE(r, text::null_sentinel);
} else {
return r | text::as_utf32;
}
} else {
using value_type = range_value_t<r_t>;
if constexpr (text::detail::is_bounded_array_v<r_t>) {
if constexpr (std::is_same_v<value_type, char>) {
auto first = detail::text::detail::begin(r);
auto last = detail::text::detail::end(r);
if (first != last && !*std::prev(last))
--last;
return BOOST_PARSER_SUBRANGE(first, last);
} else {
return r | text::as_utf32;
}
} else {
if constexpr (
std::is_same_v<value_type, char> &&
!is_utf8_view<r_t>::value) {
return BOOST_PARSER_SUBRANGE(
detail::text::detail::begin(r),
detail::text::detail::end(r));
} else {
return r | text::as_utf32;
}
}
}
}

}

#endif
68 changes: 48 additions & 20 deletions include/boost/parser/lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,19 @@
#define BOOST_PARSER_LEXER_HPP

#include <boost/parser/config.hpp>
#include <boost/parser/detail/debug_assert.hpp>
#include <boost/parser/detail/hl.hpp>
#include <boost/parser/detail/numeric.hpp>
#include <boost/parser/detail/text/transcode_view.hpp>

#if !BOOST_PARSER_USE_CONCEPTS || !__has_include(<ctre-unicode.hpp>)
#error \
"In order to work, the Boost.Parser lexer requires C++20 and CTRE's ctre-unicode.hpp single-header file in the #include path. CTRE can be found at https://github.com/hanickadot/compile-time-regular-expressions . The required header is at https://raw.githubusercontent.com/hanickadot/compile-time-regular-expressions/refs/heads/main/single-header/ctre-unicode.hpp ."
#endif

#include <boost/parser/concepts.hpp>
#include <boost/parser/detail/debug_assert.hpp>
#include <boost/parser/detail/hl.hpp>
#include <boost/parser/detail/make_input_subrange.hpp>
#include <boost/parser/detail/numeric.hpp>
#include <boost/parser/detail/text/transcode_view.hpp>

#include <ctre-unicode.hpp>

#if defined(BOOST_PARSER_TESTING)
Expand Down Expand Up @@ -213,8 +216,7 @@ namespace boost { namespace parser {
return get_long_long() == rhs.get_long_long();
case detail::token_kind::double_:
return get_double() == rhs.get_double();
default:
BOOST_PARSER_DEBUG_ASSERT(!"Error: invalid token kind.");
default: BOOST_PARSER_DEBUG_ASSERT(!"Error: invalid token kind.");
#if defined(__cpp_lib_unreachable)
std::unreachable();
#endif
Expand Down Expand Up @@ -540,14 +542,15 @@ namespace boost { namespace parser {
new_specs>{};
}

template<typename V>
template<parsable_range V>
static constexpr auto regex_range(V & base)
{
auto r = detail::make_input_subrange(base);
if constexpr (has_ws) {
return ctre::multiline_tokenize<
detail::wrap_escape_concat<regex_str, WsStr>()>(base);
detail::wrap_escape_concat<regex_str, WsStr>()>(r);
} else {
return ctre::multiline_tokenize<regex_str>(base);
return ctre::multiline_tokenize<regex_str>(r);
}
}
};
Expand Down Expand Up @@ -758,7 +761,8 @@ namespace boost { namespace parser {

template<std::ranges::forward_range V, typename Lexer>
requires std::ranges::view<V>
struct token_view : public std::ranges::view_interface<token_view<V, Lexer>>
struct tokens_view
: public std::ranges::view_interface<tokens_view<V, Lexer>>
{
private:
template<bool>
Expand All @@ -775,16 +779,23 @@ namespace boost { namespace parser {
public:
using token_type = typename Lexer::token_type;

token_view()
tokens_view()
requires std::default_initializable<V>
: base_(), lexer_(), tokens_(Lexer::regex_range(base_))
{}
constexpr explicit token_view(V base, Lexer lexer) :
constexpr explicit tokens_view(V base, Lexer lexer) :
base_(std::move(base)),
lexer_(std::move(lexer)),
tokens_(Lexer::regex_range(base_))
{}

// TODO: Document this, and explain that it's due to the way CTRE
// defines its "views" as a pair of iterators.
tokens_view(tokens_view const &) = delete;
tokens_view(tokens_view &&) = delete;
// TODO: Investigate removing the tokens_type member and just making a
// new one when needed in begin()/end().

constexpr V base() const &
requires std::copy_constructible<V>
{
Expand Down Expand Up @@ -836,14 +847,14 @@ namespace boost { namespace parser {
base_iterator_type & base_reference() noexcept { return current_; }
base_iterator_type base_reference() const { return current_; }

token_view * parent_;
tokens_view * parent_;
base_iterator_type current_ = base_iterator_type();

friend token_view::sentinel<Const>;
friend tokens_view::sentinel<Const>;

public:
constexpr iterator() = default;
constexpr iterator(token_view & parent, base_iterator_type it) :
constexpr iterator(tokens_view & parent, base_iterator_type it) :
parent_(&parent), current_(std::move(it))
{}

Expand All @@ -860,8 +871,7 @@ namespace boost { namespace parser {

#if 1
if constexpr (Lexer::has_ws) {
if (auto sv =
parse_results.template get<Lexer::size()>()) {
if (auto sv = parse_results.template get<Lexer::size()>()) {
retval = token_type(ws_id, sv);
return retval; // TODO: Skip this ws token instead.
}
Expand Down Expand Up @@ -897,7 +907,7 @@ namespace boost { namespace parser {
class sentinel
{
private:
using Parent = detail::maybe_const<Const, token_view>;
using Parent = detail::maybe_const<Const, tokens_view>;
using Base = detail::maybe_const<Const, tokens_type>;

public:
Expand Down Expand Up @@ -959,9 +969,27 @@ namespace boost { namespace parser {
};

template<typename R, typename Lexer>
token_view(R &&, Lexer) -> token_view<std::views::all_t<R>, Lexer>;
tokens_view(R &&, Lexer) -> tokens_view<std::views::all_t<R>, Lexer>;

namespace detail {
template<typename R, typename Lexer>
concept can_tokens_view =
requires { tokens_view(std::declval<R>(), Lexer()); };

struct to_tokens_impl
{
template<parsable_range R, typename Lexer>
requires std::ranges::viewable_range<R>
[[nodiscard]] constexpr auto operator()(R && r, Lexer lexer) const
{
return tokens_view((R &&)r, lexer);
}
};
}

// TODO: Needs Range adaptor.
/** TODO */
inline constexpr detail::stl_interfaces::adaptor<detail::to_tokens_impl>
to_tokens = detail::to_tokens_impl{};

}}

Expand Down
Loading

0 comments on commit 4ac5244

Please sign in to comment.