From 217076edf8ebc73147e9338be3b1652b1f819ba3 Mon Sep 17 00:00:00 2001 From: Oliver Lee Date: Sat, 21 Oct 2023 20:40:12 -0700 Subject: [PATCH] construct huffman table from symbol spans (#97) --- huffman/BUILD.bazel | 3 + huffman/src/decode.hpp | 3 +- .../detail/flattened_symbol_bitsize_view.hpp | 197 ++++++++++++++++++ huffman/src/detail/is_specialization_of.hpp | 29 +++ huffman/src/encoding.hpp | 4 +- huffman/src/symbol_span.hpp | 57 +++++ huffman/src/table.hpp | 60 +++--- huffman/src/utility.hpp | 6 + .../test/table_from_symbol_bitsize_test.cpp | 95 ++++++++- 9 files changed, 410 insertions(+), 44 deletions(-) create mode 100644 huffman/src/detail/flattened_symbol_bitsize_view.hpp create mode 100644 huffman/src/detail/is_specialization_of.hpp create mode 100644 huffman/src/symbol_span.hpp diff --git a/huffman/BUILD.bazel b/huffman/BUILD.bazel index 2f9a44f..6ada0c0 100644 --- a/huffman/BUILD.bazel +++ b/huffman/BUILD.bazel @@ -8,11 +8,14 @@ cc_library( "src/code.hpp", "src/decode.hpp", "src/detail/element_base_iterator.hpp", + "src/detail/flattened_symbol_bitsize_view.hpp", + "src/detail/is_specialization_of.hpp", "src/detail/iterator_interface.hpp", "src/detail/static_vector.hpp", "src/detail/table_node.hpp", "src/detail/table_storage.hpp", "src/encoding.hpp", + "src/symbol_span.hpp", "src/table.hpp", "src/utility.hpp", ], diff --git a/huffman/src/decode.hpp b/huffman/src/decode.hpp index 0c9b1c5..f9178bf 100644 --- a/huffman/src/decode.hpp +++ b/huffman/src/decode.hpp @@ -2,6 +2,7 @@ #include "huffman/src/bit_span.hpp" #include "huffman/src/code.hpp" #include "huffman/src/table.hpp" +#include "huffman/src/utility.hpp" #include #include @@ -21,7 +22,7 @@ namespace starflate::huffman { /// @tparam Extent The extent of the code table. /// @tparam O The type of the output iterator. template < - std::regular Symbol, + symbol Symbol, std::size_t Extent = std::dynamic_extent, std::output_iterator O> constexpr auto diff --git a/huffman/src/detail/flattened_symbol_bitsize_view.hpp b/huffman/src/detail/flattened_symbol_bitsize_view.hpp new file mode 100644 index 0000000..c8d6cf3 --- /dev/null +++ b/huffman/src/detail/flattened_symbol_bitsize_view.hpp @@ -0,0 +1,197 @@ +#pragma once + +#include "huffman/src/code.hpp" +#include "huffman/src/detail/is_specialization_of.hpp" +#include "huffman/src/symbol_span.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace starflate::huffman::detail { + +/// Range adaptor that flattens a view of symbol_span-bitsize ranges to a single +/// view +/// @tparam V random access view of symbol_span-bitsize elements +/// +/// Flattens a view of symbol_span-bitsize elements. Elements of the flattened +/// view are symbol-code pairs. Note that code values set by this view are +/// *unspecified*. It is expected that view is used in construction of a `table` +/// and that valid codes are set with the `canonicalize()` member. +/// +/// Example: +/// ~~~{.cpp} +/// const auto data = std::vector, std::uint8_t>>{ +/// {{'A', 'C'}, 2}, +/// {{'D', 'F'}, 3}, +/// {{'G', 'H'}, 4}, +/// }; +/// +/// const auto elements = +/// flattened_symbol_bitsize_view{std::ranges::ref_view{data}}; +/// +/// for (auto [symbol, code] : elements) { +/// std::cout << symbol << ", " code << '\n'; +/// } +/// +/// // prints +/// // A, 00 +/// // B, 00 +/// // C, 00 +/// // D, 000 +/// // E, 000 +/// // F, 000 +/// // G, 0000 +/// // H, 0000 +/// ~~~ +/// +template + requires std::ranges::random_access_range and + (std::tuple_size_v> == 2) and + is_specialization_of_v< + std::tuple_element_t<0, std::ranges::range_value_t>, + symbol_span> and + std::convertible_to< + std::tuple_element_t<1, std::ranges::range_value_t>, + std::uint8_t> +class flattened_symbol_bitsize_view + : public std::ranges::view_interface> +{ + V base_; + std::size_t size_; + +public: + using base_type = V; + using symbol_span_type = + std::tuple_element_t<0, std::ranges::range_value_t>; + using symbol_type = typename symbol_span_type::symbol_type; + + /// Iterator for flattened_symbol_bitsize_view + /// + class iterator + { + public: + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ranges::range_difference_t; + using reference = std::pair; + using value_type = reference; + using pointer = void; + + private: + const flattened_symbol_bitsize_view* parent_{}; + difference_type outer_index_{}; + std::ranges::range_difference_t inner_index_{}; + + template + static constexpr auto in_range_cast(T t) -> R + { + assert(std::in_range(t)); + return static_cast(t); + } + + public: + /// Default constructor + /// + iterator() + { + // this should never be invoked, but the definition is required + // for weakly_incrementable until GCC and Clang are fixed + // https://en.cppreference.com/w/cpp/iterator/weakly_incrementable + // https://wg21.link/P2325R3 + assert(false); + } + + /// Construct an iterator to a symbol-bitsize element + /// @param parent view containing the symbol_span-bitsize sequence + /// @param outer_index index specifying the symbol_span-bitsize element + /// @param inner_index index within a symbol_span-bitsize element + /// + constexpr iterator( + const flattened_symbol_bitsize_view& parent, + // NOLINTBEGIN(bugprone-easily-swappable-parameters) + std::size_t outer_index, + std::size_t inner_index) + // NOLINTEND(bugprone-easily-swappable-parameters) + : parent_{&parent}, + outer_index_{in_range_cast(outer_index)}, + inner_index_{in_range_cast< + std::ranges::range_difference_t>(inner_index)} + {} + + [[nodiscard]] + constexpr auto + operator*() const -> reference + { + const auto [symbols, bitsize] = parent_->base()[outer_index_]; + return {code{bitsize, {}}, symbols[inner_index_]}; + } + + constexpr auto operator++() & -> iterator& + { + const auto [symbols, _] = parent_->base()[outer_index_]; + + // if we've reached the end of a symbol_span + if (std::cmp_equal(++inner_index_, symbols.size())) { + // advance to the next symbol_span + ++outer_index_; + // and reset the symbol_span index + inner_index_ = {}; + } + + return *this; + } + + constexpr auto operator++(int) -> iterator + { + auto tmp = *this; + ++*this; + return tmp; + } + + friend constexpr auto + operator==(const iterator&, const iterator&) -> bool = default; + }; + + constexpr explicit flattened_symbol_bitsize_view(V base) + : base_{std::move(base)}, + size_{std::accumulate( + std::ranges::cbegin(base_), + std::ranges::cend(base_), + 0UZ, + [](auto acc, const auto& symbol_bitsize) { + const auto& [symbols, _] = symbol_bitsize; + return acc + symbols.size(); + })} + {} + + [[nodiscard]] + constexpr auto base() const -> base_type + { + return base_; + } + + [[nodiscard]] + constexpr auto size() const -> std::size_t + { + return size_; + } + + [[nodiscard]] + constexpr auto begin() const -> iterator + { + return {*this, 0UZ, 0UZ}; + } + + [[nodiscard]] + constexpr auto end() const -> iterator + { + return {*this, base().size(), 0UZ}; + } +}; + +} // namespace starflate::huffman::detail diff --git a/huffman/src/detail/is_specialization_of.hpp b/huffman/src/detail/is_specialization_of.hpp new file mode 100644 index 0000000..3eff501 --- /dev/null +++ b/huffman/src/detail/is_specialization_of.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include + +namespace starflate::huffman::detail { + +/// Checks if a type is a specialization of a class template +/// @tparam T type +/// @tparam primary class template +/// +/// @see https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2098r1.pdf +/// +/// @{ + +template class primary> +struct is_specialization_of : std::false_type +{}; + +template