Skip to content

Commit

Permalink
Add a range adaptor for token_view; token_view -> tokens_view. Break
Browse files Browse the repository at this point in the history
detail::make_input_subrange() out into its own header for reuse.

See #202.
  • Loading branch information
tzlaine committed Nov 5, 2024
1 parent eaa53d5 commit f5f77d8
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 119 deletions.
114 changes: 114 additions & 0 deletions include/boost/parser/detail/make_input_subrange.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Copyright (C) 2024 T. Zachary Laine
//
// Distributed under the Boost Software License, Version 1.0. (See
// accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#ifndef BOOST_PARSER_DETAIL_MAKE_INPUT_SUBRANGE_HPP
#define BOOST_PARSER_DETAIL_MAKE_INPUT_SUBRANGE_HPP

#include <boost/parser/config.hpp>

namespace boost::parser::detail {

template<typename T>
struct is_utf8_view : std::false_type
{};
template<typename V>
struct is_utf8_view<text::utf8_view<V>> : std::true_type
{};

#if BOOST_PARSER_USE_CONCEPTS

template<typename T>
using iterator_t = std::ranges::iterator_t<T>;
template<typename T>
using sentinel_t = std::ranges::sentinel_t<T>;
template<typename T>
using iter_value_t = std::iter_value_t<T>;
template<typename T>
using iter_reference_t = std::iter_reference_t<T>;
template<typename T>
using range_value_t = std::ranges::range_value_t<T>;
template<typename T>
using range_reference_t = std::ranges::range_reference_t<T>;
template<typename T>
using range_rvalue_reference_t = std::ranges::range_rvalue_reference_t<T>;

template<typename T>
constexpr bool is_parsable_code_unit_v = code_unit<T>;

#else

template<typename T>
using iterator_t =
decltype(detail::text::detail::begin(std::declval<T &>()));
template<typename Range>
using sentinel_t =
decltype(detail::text::detail::end(std::declval<Range &>()));
template<typename T>
using iter_value_t = typename std::iterator_traits<T>::value_type;
template<typename T>
using iter_reference_t = decltype(*std::declval<T &>());
template<typename T>
using iter_rvalue_reference_t = decltype(std::move(*std::declval<T &>()));
template<typename T>
using range_value_t = iter_value_t<iterator_t<T>>;
template<typename T>
using range_reference_t = iter_reference_t<iterator_t<T>>;
template<typename T>
using range_rvalue_reference_t = iter_rvalue_reference_t<iterator_t<T>>;

template<typename T>
constexpr bool is_parsable_code_unit_impl =
std::is_same_v<T, char> || std::is_same_v<T, wchar_t> ||
#if defined(__cpp_char8_t)
std::is_same_v<T, char8_t> ||
#endif
std::is_same_v<T, char16_t> || std::is_same_v<T, char32_t>;

template<typename T>
constexpr bool is_parsable_code_unit_v =
is_parsable_code_unit_impl<std::remove_cv_t<T>>;

#endif

template<typename R>
constexpr auto make_input_subrange(R && r) noexcept
{
using r_t = remove_cv_ref_t<R>;
if constexpr (std::is_pointer_v<r_t>) {
using value_type = iter_value_t<r_t>;
if constexpr (std::is_same_v<value_type, char>) {
return BOOST_PARSER_SUBRANGE(r, text::null_sentinel);
} else {
return r | text::as_utf32;
}
} else {
using value_type = range_value_t<r_t>;
if constexpr (text::detail::is_bounded_array_v<r_t>) {
if constexpr (std::is_same_v<value_type, char>) {
auto first = detail::text::detail::begin(r);
auto last = detail::text::detail::end(r);
if (first != last && !*std::prev(last))
--last;
return BOOST_PARSER_SUBRANGE(first, last);
} else {
return r | text::as_utf32;
}
} else {
if constexpr (
std::is_same_v<value_type, char> &&
!is_utf8_view<r_t>::value) {
return BOOST_PARSER_SUBRANGE(
detail::text::detail::begin(r),
detail::text::detail::end(r));
} else {
return r | text::as_utf32;
}
}
}
}

}

#endif
54 changes: 37 additions & 17 deletions include/boost/parser/lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,19 @@
#define BOOST_PARSER_LEXER_HPP

#include <boost/parser/config.hpp>
#include <boost/parser/detail/debug_assert.hpp>
#include <boost/parser/detail/hl.hpp>
#include <boost/parser/detail/numeric.hpp>
#include <boost/parser/detail/text/transcode_view.hpp>

#if !BOOST_PARSER_USE_CONCEPTS || !__has_include(<ctre-unicode.hpp>)
#error \
"In order to work, the Boost.Parser lexer requires C++20 and CTRE's ctre-unicode.hpp single-header file in the #include path. CTRE can be found at https://github.com/hanickadot/compile-time-regular-expressions . The required header is at https://raw.githubusercontent.com/hanickadot/compile-time-regular-expressions/refs/heads/main/single-header/ctre-unicode.hpp ."
#endif

#include <boost/parser/concepts.hpp>
#include <boost/parser/detail/debug_assert.hpp>
#include <boost/parser/detail/hl.hpp>
#include <boost/parser/detail/make_input_subrange.hpp>
#include <boost/parser/detail/numeric.hpp>
#include <boost/parser/detail/text/transcode_view.hpp>

#include <ctre-unicode.hpp>

#if defined(BOOST_PARSER_TESTING)
Expand Down Expand Up @@ -218,8 +221,7 @@ namespace boost { namespace parser {
return get_long_long() == rhs.get_long_long();
case detail::token_kind::double_:
return get_double() == rhs.get_double();
default:
BOOST_PARSER_DEBUG_ASSERT(!"Error: invalid token kind.");
default: BOOST_PARSER_DEBUG_ASSERT(!"Error: invalid token kind.");
#if defined(__cpp_lib_unreachable)
std::unreachable();
#endif
Expand Down Expand Up @@ -765,7 +767,8 @@ namespace boost { namespace parser {

template<std::ranges::forward_range V, typename Lexer>
requires std::ranges::view<V>
struct token_view : public std::ranges::view_interface<token_view<V, Lexer>>
struct tokens_view
: public std::ranges::view_interface<tokens_view<V, Lexer>>
{
private:
template<bool>
Expand All @@ -782,11 +785,11 @@ namespace boost { namespace parser {
public:
using token_type = typename Lexer::token_type;

token_view()
tokens_view()
requires std::default_initializable<V>
: base_(), lexer_(), tokens_(Lexer::regex_range(base_))
{}
constexpr explicit token_view(V base, Lexer lexer) :
constexpr explicit tokens_view(V base, Lexer lexer) :
base_(std::move(base)),
lexer_(std::move(lexer)),
tokens_(Lexer::regex_range(base_))
Expand Down Expand Up @@ -843,14 +846,14 @@ namespace boost { namespace parser {
base_iterator_type & base_reference() noexcept { return current_; }
base_iterator_type base_reference() const { return current_; }

token_view * parent_;
tokens_view * parent_;
base_iterator_type current_ = base_iterator_type();

friend token_view::sentinel<Const>;
friend tokens_view::sentinel<Const>;

public:
constexpr iterator() = default;
constexpr iterator(token_view & parent, base_iterator_type it) :
constexpr iterator(tokens_view & parent, base_iterator_type it) :
parent_(&parent), current_(std::move(it))
{}

Expand All @@ -867,8 +870,7 @@ namespace boost { namespace parser {

#if 1
if constexpr (Lexer::has_ws) {
if (auto sv =
parse_results.template get<Lexer::size()>()) {
if (auto sv = parse_results.template get<Lexer::size()>()) {
retval = token_type(ws_id, sv);
return retval; // TODO: Skip this ws token instead.
}
Expand Down Expand Up @@ -904,7 +906,7 @@ namespace boost { namespace parser {
class sentinel
{
private:
using Parent = detail::maybe_const<Const, token_view>;
using Parent = detail::maybe_const<Const, tokens_view>;
using Base = detail::maybe_const<Const, tokens_type>;

public:
Expand Down Expand Up @@ -966,9 +968,27 @@ namespace boost { namespace parser {
};

template<typename R, typename Lexer>
token_view(R &&, Lexer) -> token_view<std::views::all_t<R>, Lexer>;
tokens_view(R &&, Lexer) -> tokens_view<std::views::all_t<R>, Lexer>;

// TODO: Needs Range adaptor.
namespace detail {
template<typename R, typename Lexer>
concept can_tokens_view =
requires { tokens_view(std::declval<R>(), Lexer()); };

struct to_tokens_impl
{
template<parsable_range R, typename Lexer>
requires std::ranges::viewable_range<R>
[[nodiscard]] constexpr auto operator()(R && r, Lexer lexer) const
{
return tokens_view(detail::make_input_subrange((R &&)r), lexer);
}
};
}

/** TODO */
inline constexpr detail::stl_interfaces::adaptor<detail::to_tokens_impl>
to_tokens = detail::to_tokens_impl{};

}}

Expand Down
100 changes: 2 additions & 98 deletions include/boost/parser/parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <boost/parser/detail/numeric.hpp>
#include <boost/parser/detail/case_fold.hpp>
#include <boost/parser/detail/unicode_char_sets.hpp>
#include <boost/parser/detail/make_input_subrange.hpp>
#include <boost/parser/detail/pp_for_each.hpp>
#include <boost/parser/detail/printing.hpp>

Expand Down Expand Up @@ -1033,13 +1034,6 @@ namespace boost { namespace parser {
struct is_one_plus_p<one_plus_parser<T>> : std::true_type
{};

template<typename T>
struct is_utf8_view : std::false_type
{};
template<typename V>
struct is_utf8_view<text::utf8_view<V>> : std::true_type
{};

template<typename T>
using optional_type = remove_cv_ref_t<decltype(*std::declval<T &>())>;

Expand All @@ -1061,48 +1055,7 @@ namespace boost { namespace parser {
using has_push_back =
decltype(std::declval<T &>().push_back(*std::declval<T>().begin()));

#if BOOST_PARSER_USE_CONCEPTS

template<typename T>
using iterator_t = std::ranges::iterator_t<T>;
template<typename T>
using sentinel_t = std::ranges::sentinel_t<T>;
template<typename T>
using iter_value_t = std::iter_value_t<T>;
template<typename T>
using iter_reference_t = std::iter_reference_t<T>;
template<typename T>
using range_value_t = std::ranges::range_value_t<T>;
template<typename T>
using range_reference_t = std::ranges::range_reference_t<T>;
template<typename T>
using range_rvalue_reference_t =
std::ranges::range_rvalue_reference_t<T>;

template<typename T>
constexpr bool is_parsable_code_unit_v = code_unit<T>;

#else

template<typename T>
using iterator_t =
decltype(detail::text::detail::begin(std::declval<T &>()));
template<typename Range>
using sentinel_t =
decltype(detail::text::detail::end(std::declval<Range &>()));
template<typename T>
using iter_value_t = typename std::iterator_traits<T>::value_type;
template<typename T>
using iter_reference_t = decltype(*std::declval<T &>());
template<typename T>
using iter_rvalue_reference_t =
decltype(std::move(*std::declval<T &>()));
template<typename T>
using range_value_t = iter_value_t<iterator_t<T>>;
template<typename T>
using range_reference_t = iter_reference_t<iterator_t<T>>;
template<typename T>
using range_rvalue_reference_t = iter_rvalue_reference_t<iterator_t<T>>;
#if !BOOST_PARSER_USE_CONCEPTS

template<typename T>
using has_insert = decltype(std::declval<T &>().insert(
Expand All @@ -1122,18 +1075,6 @@ namespace boost { namespace parser {
(std::is_same_v<detected_t<range_value_t, T>, U> ||
(std::is_same_v<T, std::string> && std::is_same_v<U, char32_t>));

template<typename T>
constexpr bool is_parsable_code_unit_impl =
std::is_same_v<T, char> || std::is_same_v<T, wchar_t> ||
#if defined(__cpp_char8_t)
std::is_same_v<T, char8_t> ||
#endif
std::is_same_v<T, char16_t> || std::is_same_v<T, char32_t>;

template<typename T>
constexpr bool is_parsable_code_unit_v =
is_parsable_code_unit_impl<std::remove_cv_t<T>>;

template<typename T>
constexpr bool is_parsable_iter_v = is_parsable_code_unit_v<
remove_cv_ref_t<detected_t<iter_value_t, T>>>;
Expand Down Expand Up @@ -2611,43 +2552,6 @@ namespace boost { namespace parser {
}
}

template<typename R>
constexpr auto make_input_subrange(R && r) noexcept
{
using r_t = remove_cv_ref_t<R>;
if constexpr (std::is_pointer_v<r_t>) {
using value_type = iter_value_t<r_t>;
if constexpr (std::is_same_v<value_type, char>) {
return BOOST_PARSER_SUBRANGE(r, text::null_sentinel);
} else {
return r | text::as_utf32;
}
} else {
using value_type = range_value_t<r_t>;
if constexpr (text::detail::is_bounded_array_v<r_t>) {
if constexpr (std::is_same_v<value_type, char>) {
auto first = detail::text::detail::begin(r);
auto last = detail::text::detail::end(r);
if (first != last && !*std::prev(last))
--last;
return BOOST_PARSER_SUBRANGE(first, last);
} else {
return r | text::as_utf32;
}
} else {
if constexpr (
std::is_same_v<value_type, char> &&
!is_utf8_view<r_t>::value) {
return BOOST_PARSER_SUBRANGE(
detail::text::detail::begin(r),
detail::text::detail::end(r));
} else {
return r | text::as_utf32;
}
}
}
}

template<typename R>
constexpr auto make_view_begin(R & r) noexcept
{
Expand Down
Loading

0 comments on commit f5f77d8

Please sign in to comment.