From d7a2481437500728dbe67ef65d125700ab0b98e9 Mon Sep 17 00:00:00 2001 From: Oliver Lee Date: Tue, 26 Sep 2023 18:42:14 -0700 Subject: [PATCH] add huffman table constructor from symbol-bitsize range Add a constructor to huffman::table that takes a range of symbol-bitsize tuples. This commit also moves public tag types into a utility header. Change-Id: Ia125173ce91e2e189c23de0956d9eaa7aa51762c --- huffman/BUILD.bazel | 1 + huffman/src/detail/table_storage.hpp | 17 +-- huffman/src/table.hpp | 135 +++++++++++++----- huffman/src/utility.hpp | 30 ++++ huffman/test/BUILD.bazel | 22 ++- huffman/test/table_from_contents_test.cpp | 30 ---- .../test/table_from_symbol_bitsize_test.cpp | 69 +++++++++ 7 files changed, 214 insertions(+), 90 deletions(-) create mode 100644 huffman/src/utility.hpp create mode 100644 huffman/test/table_from_symbol_bitsize_test.cpp diff --git a/huffman/BUILD.bazel b/huffman/BUILD.bazel index 0533d29..2f9a44f 100644 --- a/huffman/BUILD.bazel +++ b/huffman/BUILD.bazel @@ -14,6 +14,7 @@ cc_library( "src/detail/table_storage.hpp", "src/encoding.hpp", "src/table.hpp", + "src/utility.hpp", ], hdrs = ["huffman.hpp"], visibility = ["//:__subpackages__"], diff --git a/huffman/src/detail/table_storage.hpp b/huffman/src/detail/table_storage.hpp index 02a26ec..f3e2bcb 100644 --- a/huffman/src/detail/table_storage.hpp +++ b/huffman/src/detail/table_storage.hpp @@ -3,6 +3,7 @@ #include "huffman/src/code.hpp" #include "huffman/src/detail/static_vector.hpp" #include "huffman/src/encoding.hpp" +#include "huffman/src/utility.hpp" #include #include @@ -15,15 +16,6 @@ namespace starflate::huffman { -/// Disambiguation tag to specify a table is constructed with a code-symbol -/// mapping -/// -struct table_contents_tag -{ - explicit table_contents_tag() = default; -}; -inline constexpr auto table_contents = table_contents_tag{}; - namespace detail { struct frequency_tag @@ -55,7 +47,6 @@ class table_storage : table_storage_base_t template constexpr table_storage( frequency_tag, const R& frequencies, std::optional eot) - : base_type{} { base_type::reserve( std::ranges::size(frequencies) + std::size_t{eot.has_value()}); @@ -74,7 +65,6 @@ class table_storage : table_storage_base_t template constexpr table_storage( data_tag, const R& data, std::optional eot) - : base_type{} { if (eot) { base_type::emplace_back(*eot, 1UZ); @@ -97,7 +87,7 @@ class table_storage : table_storage_base_t } template - constexpr table_storage(table_contents_tag, const R& map) : base_type{} + constexpr table_storage(table_contents_tag, const R& map) { const auto as_code = [](auto& node) -> auto& { return static_cast(node); @@ -116,9 +106,6 @@ class table_storage : table_storage_base_t as_symbol(*it) = s; ++it; } - - assert(std::ranges::unique(*this, {}, as_code).empty()); - assert(std::ranges::unique(*this, {}, as_symbol).empty()); } using base_type::begin; diff --git a/huffman/src/table.hpp b/huffman/src/table.hpp index 7238215..13c767f 100644 --- a/huffman/src/table.hpp +++ b/huffman/src/table.hpp @@ -3,6 +3,7 @@ #include "huffman/src/detail/element_base_iterator.hpp" #include "huffman/src/detail/table_node.hpp" #include "huffman/src/detail/table_storage.hpp" +#include "huffman/src/utility.hpp" #include #include @@ -22,8 +23,47 @@ namespace starflate::huffman { -template -using c_array = T[N]; +namespace detail { + +/// Convert an unsigned integer to signed +/// +template +static constexpr auto to_signed(U uint) +{ + using C = std::common_type_t, U>; + + assert(static_cast(uint) < static_cast(std::numeric_limits::max())); + return static_cast(uint); +} + +/// Finds the next internal node that satifies a predicate +/// +template P> +constexpr static auto find_node_if(I first, I last, P pred) +{ + using S = std::iter_difference_t; + + for (; first != last; first += to_signed(first->node_size())) { + if (pred(*first)) { + break; + } + } + + return first; +} + +/// Transforms a symbol-bitsize range to a code-symbol range +/// +template +constexpr auto to_code_symbol(const R& rng) +{ + return std::views::transform(rng, [](const auto& elem) { + const auto& [symbol, bitsize] = elem; + return std::tuple{code{bitsize, {}}, symbol}; + }); +} + +} // namespace detail /// Huffman code table /// @tparam Symbol symbol type @@ -44,31 +84,10 @@ class table detail::table_storage table_; - template - static constexpr auto to_index(U uint) + constexpr auto encode_symbols() -> void { using S = std::ranges::range_difference_t; - using C = std::common_type_t, U>; - - assert( - static_cast(uint) < static_cast(std::numeric_limits::max())); - return static_cast(uint); - } - template P> - constexpr static auto find_node_if(I first, I last, P pred) - { - for (; first != last; first += to_index(first->node_size())) { - if (pred(*first)) { - break; - } - } - - return first; - } - - constexpr auto encode_symbols() -> void - { auto reversed = std::views::reverse(table_); // precondition, audit @@ -79,13 +98,13 @@ class table const auto last = reversed.end(); while (first->node_size() != total_size) { - join_reversed(first[0], first[to_index(first->node_size())]); + join_reversed(first[0], first[detail::to_signed(first->node_size())]); const auto has_higher_freq = [&first](const auto& n) { return n.frequency() > first->frequency(); }; - auto lower = first + to_index(first->node_size()); + auto lower = first + detail::to_signed(first->node_size()); auto upper = find_node_if(lower, last, has_higher_freq); // re-sort after creating a new internal node @@ -149,13 +168,13 @@ class table } public: - /// Code point type + /// Symbol type /// - using encoding_type = encoding; + using symbol_type = Symbol; - /// Symbol type + /// Code point type /// - using symbol_type = typename encoding_type::symbol_type; + using encoding_type = encoding; /// Const iterator type /// @@ -176,13 +195,6 @@ class table /// /// @{ - template - requires std::convertible_to< - std::ranges::range_value_t, - std::tuple> - constexpr explicit table(const R& frequencies) : table{frequencies, {}} - {} - template requires std::convertible_to< std::ranges::range_value_t, @@ -194,6 +206,13 @@ class table set_skip_fields(); } + template + requires std::convertible_to< + std::ranges::range_value_t, + std::tuple> + constexpr explicit table(const R& frequencies) : table{frequencies, {}} + {} + template constexpr explicit table( const c_array, N>& frequencies) @@ -251,7 +270,7 @@ class table std::tuple_element_t<1, std::ranges::range_value_t>, symbol_type>) constexpr table(table_contents_tag, const R& map) - : table_{table_contents_tag{}, map} + : table_{table_contents, map} { set_skip_fields(); } @@ -259,13 +278,43 @@ class table template constexpr table( table_contents_tag, const c_array, N>& map) - : table_{table_contents_tag{}, map} + : table_{table_contents, map} { set_skip_fields(); } /// @} + /// Constructs a `table` from a symbol-bitsize mapping + /// @tparam R sized-range of symbol-bitsize tuple-likes + /// @param rng range of symbol-bitsize tuple-likes + /// @pre all symbols are unique + /// @pre the number of symbols with the same bitsize does not exceed the + /// available number of prefix free codes with that bitsize + /// + /// @{ + + template + requires std::convertible_to< + std::ranges::range_reference_t, + std::tuple> + constexpr table(symbol_bitsize_tag, const R& map) + : table{table_contents, detail::to_code_symbol(map)} + { + canonicalize(); + } + + template + constexpr table( + symbol_bitsize_tag, + const c_array, N>& map) + : table{table_contents, detail::to_code_symbol(map)} + { + canonicalize(); + } + + /// @} + /// Returns an iterator to the first `encoding` /// /// @note elements are ordered by code bitsize. If multiple elements have the @@ -461,4 +510,12 @@ template table(table_contents_tag, const R&) -> table, detail::tuple_size_v()>; +template +table(symbol_bitsize_tag, const c_array, N>&) -> table; + +template + requires (detail::tuple_size_v>() == 2) +table(symbol_bitsize_tag, const R&) + -> table, detail::tuple_size_v()>; + } // namespace starflate::huffman diff --git a/huffman/src/utility.hpp b/huffman/src/utility.hpp new file mode 100644 index 0000000..7ca4290 --- /dev/null +++ b/huffman/src/utility.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include + +namespace starflate::huffman { + +/// Convenience alias for a C-style array +/// +template +using c_array = T[N]; + +/// Disambiguation tag to specify a table is constructed with a code-symbol +/// mapping +/// +struct table_contents_tag +{ + explicit table_contents_tag() = default; +}; +inline constexpr auto table_contents = table_contents_tag{}; + +/// Disambiguation tag to specify a table is constructed with a symbol-bitsize +/// mapping +/// +struct symbol_bitsize_tag +{ + explicit symbol_bitsize_tag() = default; +}; +inline constexpr auto symbol_bitsize = symbol_bitsize_tag{}; + +} // namespace starflate::huffman diff --git a/huffman/test/BUILD.bazel b/huffman/test/BUILD.bazel index bc6d00b..d99bfea 100644 --- a/huffman/test/BUILD.bazel +++ b/huffman/test/BUILD.bazel @@ -21,9 +21,9 @@ cc_test( ) cc_test( - name = "table_from_frequencies_test", + name = "table_canonicalize_test", timeout = "short", - srcs = ["table_from_frequencies_test.cpp"], + srcs = ["table_canonicalize_test.cpp"], deps = [ "//huffman", "@boost_ut", @@ -31,9 +31,19 @@ cc_test( ) cc_test( - name = "table_canonicalize_test", + name = "table_find_code_test", timeout = "short", - srcs = ["table_canonicalize_test.cpp"], + srcs = ["table_find_code_test.cpp"], + deps = [ + "//huffman", + "@boost_ut", + ], +) + +cc_test( + name = "table_from_frequencies_test", + timeout = "short", + srcs = ["table_from_frequencies_test.cpp"], deps = [ "//huffman", "@boost_ut", @@ -61,9 +71,9 @@ cc_test( ) cc_test( - name = "table_find_code_test", + name = "table_from_symbol_bitsize_test", timeout = "short", - srcs = ["table_find_code_test.cpp"], + srcs = ["table_from_symbol_bitsize_test.cpp"], deps = [ "//huffman", "@boost_ut", diff --git a/huffman/test/table_from_contents_test.cpp b/huffman/test/table_from_contents_test.cpp index bf5a769..792186f 100644 --- a/huffman/test/table_from_contents_test.cpp +++ b/huffman/test/table_from_contents_test.cpp @@ -125,34 +125,4 @@ auto main() -> int expect(std::ranges::equal(t1, t2)); }; - - test("code table aborts on duplicate codes") = [] { - expect(aborts([] { // clang-format off - huffman::table{ - huffman::table_contents, - {std::pair{1_c, 'e'}, - {01_c, 'i'}, - {001_c, 'n'}, - {0001_c, 'g'}, - {0001_c, 'q'}, - {00001_c, 'x'}, - {00000_c, '\4'}}}; - // clang-format on - })); - }; - - test("code table aborts on duplicate symbols") = [] { - expect(aborts([] { // clang-format off - huffman::table{ - huffman::table_contents, - {std::pair{1_c, 'e'}, - {01_c, 'i'}, - {001_c, 'n'}, - {0000_c, 'q'}, - {0001_c, 'q'}, - {00001_c, 'x'}, - {00000_c, '\4'}}}; - // clang-format on - })); - }; } diff --git a/huffman/test/table_from_symbol_bitsize_test.cpp b/huffman/test/table_from_symbol_bitsize_test.cpp new file mode 100644 index 0000000..2476cd6 --- /dev/null +++ b/huffman/test/table_from_symbol_bitsize_test.cpp @@ -0,0 +1,69 @@ +#include "huffman/huffman.hpp" + +#include + +#include +#include +#include +#include + +auto main() -> int +{ + using ::boost::ut::expect; + using ::boost::ut::test; + + namespace huffman = ::starflate::huffman; + using namespace huffman::literals; + + test("table with DEFLATE canonical code, example 1") = [] { + static constexpr auto actual = // clang-format off + huffman::table{ + huffman::symbol_bitsize, + {std::pair{'A', 2}, + {'B', 1}, + {'C', 3}, + {'D', 3}}}; + // clang-format on + + static constexpr auto expected = // clang-format off + huffman::table{ + huffman::table_contents, + {std::pair{0_c, 'B'}, + {10_c, 'A'}, + {110_c, 'C'}, + {111_c, 'D'}}}; + // clang-format on + + expect(std::ranges::equal(actual, expected)); + }; + + test("table with DEFLATE canonical code, example 2") = [] { + const auto actual = // clang-format off + huffman::table{ + huffman::symbol_bitsize, + std::vector>{{'A', 3}, + {'B', 3}, + {'C', 3}, + {'D', 3}, + {'E', 3}, + {'F', 2}, + {'G', 4}, + {'H', 4}}}; + // clang-format on + + static constexpr auto expected = // clang-format off + huffman::table{ + huffman::table_contents, + {std::pair{00_c, 'F'}, + {010_c, 'A'}, + {011_c, 'B'}, + {100_c, 'C'}, + {101_c, 'D'}, + {110_c, 'E'}, + {1110_c, 'G'}, + {1111_c, 'H'}}}; + // clang-format on + + expect(std::ranges::equal(actual, expected)); + }; +}