From 6fffff8c1e9572f1c3e2a907513bff02fde2a713 Mon Sep 17 00:00:00 2001 From: Oliver Lee Date: Tue, 26 Sep 2023 18:37:36 -0700 Subject: [PATCH] construct huffman table directly on reversed elements Change-Id: Ic42b6b1c03f10077d19d49a9e6544214834ba42f --- huffman/src/detail/table_node.hpp | 37 ++++++++++++++++++++----------- huffman/src/table.hpp | 35 +++++++++++++---------------- 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/huffman/src/detail/table_node.hpp b/huffman/src/detail/table_node.hpp index 5d25295..1185781 100644 --- a/huffman/src/detail/table_node.hpp +++ b/huffman/src/detail/table_node.hpp @@ -114,30 +114,41 @@ class table_node : public encoding return init_.node_size; } /// "Joins" two `table_node`s - /// @param lhs left table_node /// @param rhs right table_node - /// @pre `&lhs + lhs.node_size() == &rhs` + /// @param lhs left table_node + /// @pre `&rhs - rhs.node_size() == &lhs` + /// + /// Logically "join" `rhs` with the next adjacent node `lhs` "creating" an + /// internal node. This adds the frequency of `lhs` to `rhs`, left pads all + /// the codes of the internal nodes of `rhs` with 0s and left pads all the + /// code of the internal nodes of `lhs` with 1s. /// - /// Logically "join" `lhs` with the next adjacent node `rhs` "creating" an - /// internal node. This adds the frequency of `rhs` to `lhs`, left pads all - /// the codes of the internal nodes of `lhs` with 0s and left pads all the - /// code of the internal nodes of `rhs` with 1s. + /// @attention This is intended to be used with a reversed view of table + /// elements. /// - friend constexpr auto join(table_node& lhs, table_node& rhs) -> void + friend constexpr auto join_reversed(table_node& rhs, table_node& lhs) -> void { - assert( - &lhs + lhs.node_size() == &rhs and "`lhs` and `rhs` are not adjacent"); + assert(&rhs == &lhs + rhs.node_size() and // + "`rhs` and `lhs` are not adjacent"); const auto left_pad_with = [](auto b) { return [b](table_node& n) { b >> static_cast(n); }; }; - std::for_each(&lhs, &rhs, left_pad_with(bit{0})); - std::for_each(&rhs, &rhs + rhs.node_size(), left_pad_with(bit{1})); + // reverse iterator, so lhs is one past what we want to modify, and we do want to modify rhs. + // [mid, last) contains the nodes to pad with 0 + const auto last = &rhs + 1; + const auto mid = &lhs + 1; + + // [first, mid) contains the nodes to pad with 1 + const auto first = mid - static_cast(lhs.node_size()); + + std::for_each(mid, last, left_pad_with(bit{0})); + std::for_each(first, mid, left_pad_with(bit{1})); // NOLINTBEGIN(cppcoreguidelines-pro-type-union-access) - lhs.init_.frequency += rhs.frequency(); - lhs.init_.node_size += rhs.node_size(); + rhs.init_.frequency += lhs.frequency(); + rhs.init_.node_size += lhs.node_size(); // NOLINTEND(cppcoreguidelines-pro-type-union-access) } diff --git a/huffman/src/table.hpp b/huffman/src/table.hpp index dc1f8fa..25fbf11 100644 --- a/huffman/src/table.hpp +++ b/huffman/src/table.hpp @@ -67,18 +67,20 @@ class table constexpr auto encode_symbols() -> void { + auto reversed = std::views::reverse(table_); + // precondition, audit - assert(std::ranges::is_sorted(table_)); + assert(std::ranges::is_sorted(reversed)); - const auto total_size = table_.size(); - auto first = table_.begin(); - const auto last = table_.end(); + const auto total_size = reversed.size(); + auto first = reversed.begin(); + const auto last = reversed.end(); while (first->node_size() != total_size) { - join(first[0], first[to_index(first->node_size())]); + join_reversed(first[0], first[to_index(first->node_size())]); - const auto has_higher_freq = [f = first->frequency()](const auto& n) { - return n.frequency() > f; + const auto has_higher_freq = [&first](const auto& n) { + return n.frequency() > first->frequency(); }; auto lower = first + to_index(first->node_size()); @@ -103,33 +105,26 @@ class table return; } - std::ranges::sort(table_); + auto reversed = std::views::reverse(table_); + + std::ranges::sort(reversed); // precondition assert( std::ranges::unique( - table_, {}, [](const auto& elem) { return elem.symbol; }) + reversed, {}, [](const auto& elem) { return elem.symbol; }) .empty() and "a `table` cannot contain duplicate symbols"); const auto frequencies = std::views::transform( - table_, [](const auto& elem) { return elem.frequency(); }); + reversed, [](const auto& elem) { return elem.frequency(); }); [[maybe_unused]] const auto total_freq = std::accumulate(std::cbegin(frequencies), std::cend(frequencies), 0UZ); encode_symbols(); // postcondition - assert(total_freq == table_.front().frequency()); - - // Implicit construction of the Huffman tree results in the least frequent - // symbols at the beginning (largest bitsize) and most frequent at the end - // (smallest bitsize). See details in `table_node.hpp` for how this is - // represented. - // - // Reversing the elements allows code search to start with the symbols with - // the smallest bitsize. - std::ranges::reverse(table_); + assert(total_freq == reversed.front().frequency()); } constexpr auto set_skip_fields() -> void