Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add huffman table constructor from symbol-bitsize range #96

Merged
merged 1 commit into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions huffman/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ cc_library(
"src/detail/table_storage.hpp",
"src/encoding.hpp",
"src/table.hpp",
"src/utility.hpp",
],
hdrs = ["huffman.hpp"],
visibility = ["//:__subpackages__"],
Expand Down
24 changes: 4 additions & 20 deletions huffman/src/detail/table_storage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "huffman/src/code.hpp"
#include "huffman/src/detail/static_vector.hpp"
#include "huffman/src/encoding.hpp"
#include "huffman/src/utility.hpp"

#include <algorithm>
#include <cassert>
Expand All @@ -13,18 +14,7 @@
#include <type_traits>
#include <vector>

namespace starflate::huffman {

/// Disambiguation tag to specify a table is constructed with a code-symbol
/// mapping
///
struct table_contents_tag
{
explicit table_contents_tag() = default;
};
inline constexpr auto table_contents = table_contents_tag{};

namespace detail {
namespace starflate::huffman::detail {

struct frequency_tag
{
Expand Down Expand Up @@ -55,7 +45,6 @@ class table_storage : table_storage_base_t<IntrusiveNode, Extent>
template <class R>
constexpr table_storage(
frequency_tag, const R& frequencies, std::optional<symbol_type> eot)
: base_type{}
{
base_type::reserve(
std::ranges::size(frequencies) + std::size_t{eot.has_value()});
Expand All @@ -74,7 +63,6 @@ class table_storage : table_storage_base_t<IntrusiveNode, Extent>
template <class R>
constexpr table_storage(
data_tag, const R& data, std::optional<symbol_type> eot)
: base_type{}
{
if (eot) {
base_type::emplace_back(*eot, 1UZ);
Expand All @@ -97,7 +85,7 @@ class table_storage : table_storage_base_t<IntrusiveNode, Extent>
}

template <class R>
constexpr table_storage(table_contents_tag, const R& map) : base_type{}
constexpr table_storage(table_contents_tag, const R& map)
{
const auto as_code = [](auto& node) -> auto& {
return static_cast<code&>(node);
Expand All @@ -116,9 +104,6 @@ class table_storage : table_storage_base_t<IntrusiveNode, Extent>
as_symbol(*it) = s;
++it;
}

assert(std::ranges::unique(*this, {}, as_code).empty());
assert(std::ranges::unique(*this, {}, as_symbol).empty());
}

using base_type::begin;
Expand All @@ -131,5 +116,4 @@ class table_storage : table_storage_base_t<IntrusiveNode, Extent>
using base_type::size;
};

} // namespace detail
} // namespace starflate::huffman
} // namespace starflate::huffman::detail
132 changes: 94 additions & 38 deletions huffman/src/table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "huffman/src/detail/element_base_iterator.hpp"
#include "huffman/src/detail/table_node.hpp"
#include "huffman/src/detail/table_storage.hpp"
#include "huffman/src/utility.hpp"

#include <algorithm>
#include <compare>
Expand All @@ -22,8 +23,45 @@

namespace starflate::huffman {

template <class T, std::size_t N>
using c_array = T[N];
namespace detail {

/// Convert an unsigned integer to signed
///
template <std::signed_integral S, std::unsigned_integral U>
static constexpr auto to_signed(U uint)
{
assert(std::cmp_less_equal(uint, std::numeric_limits<S>::max()));
return static_cast<S>(uint);
}

/// Finds the next internal node that satifies a predicate
///
template <std::random_access_iterator I, std::indirect_unary_predicate<I> P>
constexpr static auto find_node_if(I first, I last, P pred)
{
using S = std::iter_difference_t<I>;

for (; first != last; first += to_signed<S>(first->node_size())) {
if (pred(*first)) {
break;
}
}

return first;
}

/// Transforms a symbol-bitsize range to a code-symbol range
///
template <class R>
constexpr auto to_code_symbol(const R& rng)
{
return std::views::transform(rng, [](const auto& elem) {
const auto& [symbol, bitsize] = elem;
return std::tuple{code{bitsize, {}}, symbol};
});
}

} // namespace detail

/// Huffman code table
/// @tparam Symbol symbol type
Expand All @@ -44,29 +82,9 @@ class table

detail::table_storage<node_type, Extent> table_;

template <std::unsigned_integral U>
static constexpr auto to_index(U uint)
{
using S = std::ranges::range_difference_t<decltype(table_)>;

assert(std::cmp_less_equal(uint, std::numeric_limits<S>::max()));
return static_cast<S>(uint);
}

template <std::forward_iterator I, std::indirect_unary_predicate<I> P>
constexpr static auto find_node_if(I first, I last, P pred)
{
for (; first != last; first += to_index(first->node_size())) {
if (pred(*first)) {
break;
}
}

return first;
}

constexpr auto encode_symbols() -> void
{
using S = std::ranges::range_difference_t<decltype(table_)>;
auto reversed = std::views::reverse(table_);

// precondition, audit
Expand All @@ -77,13 +95,13 @@ class table
const auto last = reversed.end();

while (first->node_size() != total_size) {
join_reversed(first[0], first[to_index(first->node_size())]);
join_reversed(first[0], first[detail::to_signed<S>(first->node_size())]);

const auto has_higher_freq = [&first](const auto& n) {
return n.frequency() > first->frequency();
};

auto lower = first + to_index(first->node_size());
auto lower = first + detail::to_signed<S>(first->node_size());
auto upper = find_node_if(lower, last, has_higher_freq);

// re-sort after creating a new internal node
Expand Down Expand Up @@ -147,13 +165,13 @@ class table
}

public:
/// Code point type
/// Symbol type
///
using encoding_type = encoding<Symbol>;
using symbol_type = Symbol;

/// Symbol type
/// Code point type
///
using symbol_type = typename encoding_type::symbol_type;
using encoding_type = encoding<symbol_type>;

/// Const iterator type
///
Expand All @@ -174,13 +192,6 @@ class table
///
/// @{

template <std::ranges::sized_range R>
requires std::convertible_to<
std::ranges::range_value_t<R>,
std::tuple<symbol_type, std::size_t>>
constexpr explicit table(const R& frequencies) : table{frequencies, {}}
{}

template <std::ranges::sized_range R>
requires std::convertible_to<
std::ranges::range_value_t<R>,
Expand All @@ -192,6 +203,13 @@ class table
set_skip_fields();
}

template <std::ranges::sized_range R>
requires std::convertible_to<
std::ranges::range_value_t<R>,
std::tuple<symbol_type, std::size_t>>
constexpr explicit table(const R& frequencies) : table{frequencies, {}}
{}

template <std::integral I, auto N>
constexpr explicit table(
const c_array<std::pair<symbol_type, I>, N>& frequencies)
Expand Down Expand Up @@ -249,21 +267,51 @@ class table
std::tuple_element_t<1, std::ranges::range_value_t<R>>,
symbol_type>)
constexpr table(table_contents_tag, const R& map)
: table_{table_contents_tag{}, map}
: table_{table_contents, map}
{
set_skip_fields();
}

template <std::size_t N>
constexpr table(
table_contents_tag, const c_array<std::pair<code, symbol_type>, N>& map)
: table_{table_contents_tag{}, map}
: table_{table_contents, map}
{
set_skip_fields();
}

/// @}

/// Constructs a `table` from a symbol-bitsize mapping
/// @tparam R sized-range of symbol-bitsize tuple-likes
/// @param rng range of symbol-bitsize tuple-likes
/// @pre all symbols are unique
/// @pre the number of symbols with the same bitsize does not exceed the
/// available number of prefix free codes with that bitsize
///
/// @{

template <std::ranges::sized_range R>
requires std::convertible_to<
std::ranges::range_reference_t<R>,
std::tuple<symbol_type, std::uint8_t>>
constexpr table(symbol_bitsize_tag, const R& map)
: table{table_contents, detail::to_code_symbol(map)}
{
canonicalize();
}

template <std::size_t N>
constexpr table(
symbol_bitsize_tag,
const c_array<std::pair<symbol_type, std::uint8_t>, N>& map)
: table{table_contents, detail::to_code_symbol(map)}
{
canonicalize();
}

/// @}

/// Returns an iterator to the first `encoding`
///
/// @note elements are ordered by code bitsize. If multiple elements have the
Expand Down Expand Up @@ -459,4 +507,12 @@ template <class R>
table(table_contents_tag, const R&)
-> table<detail::tuple_arg_t<1, R>, detail::tuple_size_v<R>()>;

template <class S, class I, std::size_t N>
table(symbol_bitsize_tag, const c_array<std::pair<S, I>, N>&) -> table<S, N>;

template <class R>
requires (detail::tuple_size_v<std::ranges::range_value_t<R>>() == 2)
table(symbol_bitsize_tag, const R&)
-> table<detail::tuple_arg_t<0, R>, detail::tuple_size_v<R>()>;

} // namespace starflate::huffman
30 changes: 30 additions & 0 deletions huffman/src/utility.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#pragma once

#include <cstddef>

namespace starflate::huffman {

/// Convenience alias for a C-style array
///
template <class T, std::size_t N>
using c_array = T[N];

/// Disambiguation tag to specify a table is constructed with a code-symbol
/// mapping
///
struct table_contents_tag
{
explicit table_contents_tag() = default;
};
inline constexpr auto table_contents = table_contents_tag{};

/// Disambiguation tag to specify a table is constructed with a symbol-bitsize
/// mapping
///
struct symbol_bitsize_tag
{
explicit symbol_bitsize_tag() = default;
};
inline constexpr auto symbol_bitsize = symbol_bitsize_tag{};

} // namespace starflate::huffman
22 changes: 16 additions & 6 deletions huffman/test/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,29 @@ cc_test(
)

cc_test(
name = "table_from_frequencies_test",
name = "table_canonicalize_test",
timeout = "short",
srcs = ["table_from_frequencies_test.cpp"],
srcs = ["table_canonicalize_test.cpp"],
deps = [
"//huffman",
"@boost_ut",
],
)

cc_test(
name = "table_canonicalize_test",
name = "table_find_code_test",
timeout = "short",
srcs = ["table_canonicalize_test.cpp"],
srcs = ["table_find_code_test.cpp"],
deps = [
"//huffman",
"@boost_ut",
],
)

cc_test(
name = "table_from_frequencies_test",
timeout = "short",
srcs = ["table_from_frequencies_test.cpp"],
deps = [
"//huffman",
"@boost_ut",
Expand Down Expand Up @@ -61,9 +71,9 @@ cc_test(
)

cc_test(
name = "table_find_code_test",
name = "table_from_symbol_bitsize_test",
timeout = "short",
srcs = ["table_find_code_test.cpp"],
srcs = ["table_from_symbol_bitsize_test.cpp"],
deps = [
"//huffman",
"@boost_ut",
Expand Down
Loading
Loading