Skip to content

Commit

Permalink
construct huffman table from symbol spans
Browse files Browse the repository at this point in the history
Define class template `symbol_span` that represents an inclusive range
from first to last. The `symbol_bitsize` constructor of `huffman::table`
now takes a range of pair-likes of `symbol_span` and bitsize, allowin
gthe same bitsize to be associated with a contiguous range of symbols.

For example, the static Huffman tree corresponding to the literal/length
alphabet in section 3.2.6 of the DEFLATE RFC can be defined as:

 constexpr auto table =
  huffman::table<std::uint16_t, 288>{
    huffman::symbol_bitsize,
    {{{  0, 143}, 8},
     {{144, 255}, 9},
     {{256, 279}, 7},
     {{280, 287}, 8}}};

resolves #91

Change-Id: I50f7ea63561de0bb785c85467cdb943829b65edf
  • Loading branch information
oliverlee committed Oct 7, 2023
1 parent fb781a1 commit 6a09bb7
Show file tree
Hide file tree
Showing 9 changed files with 350 additions and 36 deletions.
3 changes: 3 additions & 0 deletions huffman/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@ cc_library(
"src/code.hpp",
"src/decode.hpp",
"src/detail/element_base_iterator.hpp",
"src/detail/flattened_symbol_bitsize_view.hpp",
"src/detail/is_specialization_of.hpp",
"src/detail/iterator_interface.hpp",
"src/detail/static_vector.hpp",
"src/detail/table_node.hpp",
"src/detail/table_storage.hpp",
"src/encoding.hpp",
"src/symbol_span.hpp",
"src/table.hpp",
"src/utility.hpp",
],
Expand Down
3 changes: 2 additions & 1 deletion huffman/src/decode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "huffman/src/bit_span.hpp"
#include "huffman/src/code.hpp"
#include "huffman/src/table.hpp"
#include "huffman/src/utility.hpp"

#include <iterator>

Expand All @@ -20,7 +21,7 @@ namespace starflate::huffman {
/// @tparam Extent The extent of the code table.
/// @tparam O The type of the output iterator.
template <
std::regular Symbol,
symbol Symbol,
std::size_t Extent = std::dynamic_extent,
std::output_iterator<Symbol> O>
constexpr auto
Expand Down
122 changes: 122 additions & 0 deletions huffman/src/detail/flattened_symbol_bitsize_view.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#pragma once

#include "huffman/src/detail/iterator_interface.hpp"

#include <cstddef>
#include <iterator>
#include <numeric>
#include <ranges>

namespace starflate::huffman::detail {

template <std::ranges::view V>
class flattened_symbol_bitsize_view
: public std::ranges::view_interface<flattened_symbol_bitsize_view<V>>
{
V base_;
std::size_t size_;

using child_view_type = std::remove_cvref_t<std::ranges::range_value_t<V>>;

public:
using base_type = V;

class iterator
{
const flattened_symbol_bitsize_view* parent_{};
std::size_t offset1_{};
std::size_t offset2_{};

public:
using iterator_category = std::forward_iterator_tag;
using difference_type = std::ranges::range_difference_t<child_view_type>;
using value_type = std::ranges::range_value_t<child_view_type>;
using reference = std::ranges::range_reference_t<child_view_type>;
using pointer = void;

iterator() = default;

constexpr iterator(
const flattened_symbol_bitsize_view& parent,
std::size_t offset1,
std::size_t offset2)
: parent_{&parent}, offset1_{offset1}, offset2_{offset2}
{}

[[nodiscard]]
constexpr auto
operator*() const -> reference
{
using D = difference_type;
return parent_
->base()[static_cast<D>(offset1_)][static_cast<D>(offset2_)];
}

constexpr auto operator++() & -> iterator&
{
using D = difference_type;

if (++offset2_ ==
static_cast<std::size_t>(
parent_->base()[static_cast<D>(offset1_)].size())) {
++offset1_;
offset2_ = {};
}

return *this;
}

constexpr auto operator++(int) -> iterator
{
auto tmp = *this;
++*this;
return tmp;
}

friend constexpr auto
operator==(const iterator&, const iterator&) -> bool = default;
};

constexpr explicit flattened_symbol_bitsize_view(V base)
: base_{std::move(base)},
size_{std::accumulate(
std::ranges::cbegin(base_),
std::ranges::cend(base_),
0UZ,
[](auto acc, const auto& subrange) {
return acc + subrange.size();
})}
{}

[[nodiscard]]
constexpr auto base() const -> base_type
{
return base_;
}

[[nodiscard]]
constexpr auto size() const -> std::size_t
{
return size_;
}

[[nodiscard]]
constexpr auto begin() const -> iterator
{
return {*this, 0, 0};
}

[[nodiscard]]
constexpr auto end() const -> iterator
{
return {*this, base().size(), 0};
}
};

} // namespace starflate::huffman::detail

// flattened_symbol_bitsize_view is only created to adapt an input range during
// construction
template <class V>
inline constexpr bool std::ranges::enable_borrowed_range<
::starflate::huffman::detail::flattened_symbol_bitsize_view<V>> = true;
27 changes: 27 additions & 0 deletions huffman/src/detail/is_specialization_of.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#pragma once

#include <type_traits>

namespace starflate::huffman::detail {

/// Checks if a type is a specialization of a class template
/// @tparam T type
/// @tparam primary class template
///
/// @see https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2098r1.pdf
///
///

template <class T, template <class...> class primary>
struct is_specialization_of : std::false_type
{};

template <template <class...> class primary, class... Args>
struct is_specialization_of<primary<Args...>, primary> : std::true_type
{};

template <class T, template <class...> class primary>
inline constexpr auto is_specialization_of_v =
is_specialization_of<T, primary>::value;

} // namespace starflate::huffman::detail
4 changes: 2 additions & 2 deletions huffman/src/encoding.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include "huffman/src/code.hpp"
#include "huffman/src/utility.hpp"

#include <concepts>
#include <ostream>
Expand All @@ -12,8 +13,7 @@ namespace starflate::huffman {
/// This type associates a symbol to a code. It is typically constructed and
/// updated as part of the construction of a table.
///
template <std::regular Symbol>
requires std::totally_ordered<Symbol>
template <symbol Symbol>
struct encoding : code
{
using symbol_type = Symbol;
Expand Down
59 changes: 59 additions & 0 deletions huffman/src/symbol_span.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#pragma once

#include "huffman/src/utility.hpp"

#include <concepts>
#include <ranges>

namespace starflate::huffman {

/// An inclusive span of symbols
///
template <symbol S>
class symbol_span : public std::ranges::view_interface<symbol_span<S>>
{
S first_;
S last_;

using range_type = std::ranges::iota_view<S, S>;

constexpr auto as_range() const -> range_type
{
auto tmp = last_;
return range_type{first_, ++tmp};
}

public:
using symbol_type = S;

using iterator = std::ranges::iterator_t<range_type>;

/// Construct a symbol span of a single symbol
/// @param first symbol
///
constexpr symbol_span(symbol_type first) : symbol_span{first, first} {}

/// Construct a symbol span from first to last, inclusive
/// @param first, last inclusive symbol range
/// @pre first <= last
///
constexpr symbol_span(symbol_type first, symbol_type last)
: first_{first}, last_{last}
{
assert(first <= last);
}

[[nodiscard]]
constexpr auto begin() const -> iterator
{
return as_range().begin();
}

[[nodiscard]]
constexpr auto end() const -> iterator
{
return as_range().end();
}
};

} // namespace starflate::huffman
63 changes: 38 additions & 25 deletions huffman/src/table.hpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
#pragma once

#include "huffman/src/detail/element_base_iterator.hpp"
#include "huffman/src/detail/flattened_symbol_bitsize_view.hpp"
#include "huffman/src/detail/is_specialization_of.hpp"
#include "huffman/src/detail/table_node.hpp"
#include "huffman/src/detail/table_storage.hpp"
#include "huffman/src/symbol_span.hpp"
#include "huffman/src/utility.hpp"

#include <algorithm>
Expand Down Expand Up @@ -55,10 +58,19 @@ constexpr static auto find_node_if(I first, I last, P pred)
template <class R>
constexpr auto to_code_symbol(const R& rng)
{
return std::views::transform(rng, [](const auto& elem) {
const auto& [symbol, bitsize] = elem;
return std::tuple{code{bitsize, {}}, symbol};
});
static constexpr auto convert_with = [](auto bitsize) {
return [bitsize](auto symbol) {
return std::tuple{code{bitsize, {}}, symbol};
};
};

static constexpr auto expand = [](const auto& elem) {
const auto& [symbols, bitsize] = elem;
static_assert(std::ranges::view<std::remove_cvref_t<decltype(symbols)>>);
return std::views::transform(symbols, convert_with(bitsize));
};

return flattened_symbol_bitsize_view{std::views::transform(rng, expand)};
}

} // namespace detail
Expand All @@ -74,8 +86,7 @@ constexpr auto to_code_symbol(const R& rng)
/// `std::array` is used to store the Huffman tree, with the size determined by
/// `Extent`.
///
template <std::regular Symbol, std::size_t Extent = std::dynamic_extent>
requires std::totally_ordered<Symbol>
template <symbol Symbol, std::size_t Extent = std::dynamic_extent>
class table
{
using node_type = detail::table_node<Symbol>;
Expand Down Expand Up @@ -232,11 +243,6 @@ class table
///
/// @{

template <std::ranges::input_range R>
requires std::convertible_to<std::ranges::range_reference_t<R>, symbol_type>
constexpr explicit table(const R& data) : table{data, {}}
{}

template <std::ranges::input_range R>
requires std::convertible_to<std::ranges::range_reference_t<R>, symbol_type>
constexpr explicit table(const R& data, std::optional<symbol_type> eot)
Expand All @@ -246,6 +252,11 @@ class table
set_skip_fields();
}

template <std::ranges::input_range R>
requires std::convertible_to<std::ranges::range_reference_t<R>, symbol_type>
constexpr explicit table(const R& data) : table{data, {}}
{}

/// @}

/// Constructs a `table` from the given code-symbol mapping contents
Expand Down Expand Up @@ -275,10 +286,8 @@ class table
template <std::size_t N>
constexpr table(
table_contents_tag, const c_array<std::pair<code, symbol_type>, N>& map)
: table_{table_contents, map}
{
set_skip_fields();
}
: table{table_contents, std::ranges::ref_view{map}}
{}

/// @}

Expand All @@ -291,10 +300,10 @@ class table
///
/// @{

template <std::ranges::sized_range R>
template <std::ranges::random_access_range R>
requires std::convertible_to<
std::ranges::range_reference_t<R>,
std::tuple<symbol_type, std::uint8_t>>
std::tuple<symbol_span<symbol_type>, std::uint8_t>>
constexpr table(symbol_bitsize_tag, const R& map)
: table{table_contents, detail::to_code_symbol(map)}
{
Expand All @@ -304,11 +313,9 @@ class table
template <std::size_t N>
constexpr table(
symbol_bitsize_tag,
const c_array<std::pair<symbol_type, std::uint8_t>, N>& map)
: table{table_contents, detail::to_code_symbol(map)}
{
canonicalize();
}
const c_array<std::pair<symbol_span<symbol_type>, std::uint8_t>, N>& map)
: table{symbol_bitsize, std::ranges::ref_view{map}}
{}

/// @}

Expand Down Expand Up @@ -508,11 +515,17 @@ table(table_contents_tag, const R&)
-> table<detail::tuple_arg_t<1, R>, detail::tuple_size_v<R>()>;

template <class S, class I, std::size_t N>
requires (not detail::is_specialization_of_v<S, symbol_span>)
table(symbol_bitsize_tag, const c_array<std::pair<S, I>, N>&) -> table<S, N>;

template <class R>
requires (detail::tuple_size_v<std::ranges::range_value_t<R>>() == 2)
table(symbol_bitsize_tag, const R&)
-> table<detail::tuple_arg_t<0, R>, detail::tuple_size_v<R>()>;
requires (
detail::tuple_size_v<std::ranges::range_value_t<R>>() == 2 and
detail::is_specialization_of_v<
std::tuple_element_t<0, std::ranges::range_value_t<R>>,
symbol_span>)
table(symbol_bitsize_tag, const R&) -> table<
typename detail::tuple_arg_t<0, R>::symbol_type,
detail::tuple_size_v<R>()>;

} // namespace starflate::huffman
Loading

0 comments on commit 6a09bb7

Please sign in to comment.