Skip to content

Commit

Permalink
define huffman::table::canonicalize()
Browse files Browse the repository at this point in the history
huffman::table::canonicalize() updates the existing codes in a table to
canonical form for DEFLATE:
 * All codes of a given bit length have lexicographically consecutive
   values, in the same order as the symbols they represent;
 * Shorter codes lexicographically precede longer codes.

Change-Id: Idc3dceefe1b5d17a54f1dab29155a499c7e1d138
  • Loading branch information
oliverlee committed Sep 23, 2023
1 parent 041bcb8 commit 2ea347f
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 0 deletions.
1 change: 1 addition & 0 deletions huffman/src/detail/static_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class static_vector : std::array<T, Capacity>
}

constexpr auto size() const noexcept -> size_type { return size_; }
constexpr auto empty() const noexcept -> bool { return size_ == size_type{}; }

constexpr auto end() noexcept -> iterator { return begin() + size(); }
constexpr auto end() const noexcept -> const_iterator
Expand Down
1 change: 1 addition & 0 deletions huffman/src/detail/table_storage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ class table_storage : table_storage_base_t<IntrusiveNode, Extent>
using base_type::cbegin;
using base_type::cend;
using base_type::data;
using base_type::empty;
using base_type::end;
using base_type::front;
using base_type::size;
Expand Down
63 changes: 63 additions & 0 deletions huffman/src/table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,69 @@ class table
}
return os;
}

/// Update table code values to DEFLATE canonical form
///
/// The Huffman codes used for each alphabet in the "deflate" format have two
/// additional rules:
/// * All codes of a given bit length have lexicographically consecutive
/// values, in the same order as the symbols they represent;
/// * Shorter codes lexicographically precede longer codes.
///
/// @see section 3.2.2 https://datatracker.ietf.org/doc/html/rfc1951
///
/// @{

constexpr auto canonicalize() & -> table&
{
if (table_.empty()) {
return *this;
}

// elements are stored in reverse order, so we maintain that order
auto reversed = std::views::reverse(table_);

// set lexicographical order
std::ranges::sort( //
reversed, //
[](const auto& x, const auto& y) {
return std::pair{x.bitsize(), std::ref(x.symbol)} <
std::pair{y.bitsize(), std::ref(y.symbol)};
});

// used to determine initial value of next_code[bits]
// calculated in step 2
auto base_code = 0Z;

// used in determining consecutive code values in step 3
auto next_code = code{};

// clang-format off
for (auto& n : reversed) {
assert(next_code.bitsize() <= n.bitsize());

next_code = {
n.bitsize(),
next_code.bitsize() == n.bitsize()
? next_code.value() + 1U // 3) next_code[len]++;
: base_code <<= (n.bitsize() - next_code.bitsize()) // 2) next_code[bits] = code; code = (...) << 1;
};

static_cast<code&>(n) = next_code; // 3) tree[n].Code = next_code[len];

++base_code; // 2) (code + bl_count[bits-1])
}
// clang-format on

return *this;
}

constexpr auto canonicalize() && -> table&&
{
return std::move(canonicalize());
}

/// @}
};

namespace detail {
Expand Down
10 changes: 10 additions & 0 deletions huffman/test/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,16 @@ cc_test(
],
)

cc_test(
name = "table_canonicalize_test",
timeout = "short",
srcs = ["table_canonicalize_test.cpp"],
deps = [
"//huffman",
"@boost_ut",
],
)

cc_test(
name = "table_from_data_test",
timeout = "short",
Expand Down
90 changes: 90 additions & 0 deletions huffman/test/table_canonicalize_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#include "huffman/huffman.hpp"

#include <boost/ut.hpp>

#include <algorithm>
#include <utility>

auto main() -> int
{
using ::boost::ut::expect;
using ::boost::ut::test;

namespace huffman = ::starflate::huffman;
using namespace huffman::literals;

test("table with DEFLATE canonical code, example 1") = [] {
static constexpr auto t1 = // clang-format off
huffman::table{
huffman::table_contents,
{std::pair{010_c, 'D'},
{011_c, 'C'},
{00_c, 'A'},
{1_c, 'B'}}}.canonicalize();
// clang-format on

static constexpr auto t2 = // clang-format off
huffman::table{
huffman::table_contents,
{std::pair{111_c, 'D'},
{110_c, 'C'},
{10_c, 'A'},
{0_c, 'B'}}};
// clang-format on

expect(std::ranges::equal(t1, t2));
};

test("table with DEFLATE canonical code, example 2") = [] {
// NOTE: t1 is an *invalid* table (as initially specified) as it does not
// contain prefix free codes.
static constexpr auto t1 = // clang-format off
huffman::table{
huffman::table_contents,
{std::pair{1111_c, 'H'},
{0111_c, 'G'},
{100_c, 'E'},
{011_c, 'D'},
{010_c, 'C'},
{001_c, 'B'},
{000_c, 'A'},
{11_c, 'F'}}}.canonicalize();
// clang-format on

static constexpr auto t2 = // clang-format off
huffman::table{
huffman::table_contents,
{std::pair{1111_c, 'H'},
{1110_c, 'G'},
{110_c, 'E'},
{101_c, 'D'},
{100_c, 'C'},
{011_c, 'B'},
{010_c, 'A'},
{00_c, 'F'}}};
// clang-format on

expect(std::ranges::equal(t1, t2));
};

test("canonicalization is idempotent") = [] {
static constexpr auto t1 = // clang-format off
huffman::table{
huffman::table_contents,
{std::pair{1111_c, 'H'},
{1110_c, 'G'},
{110_c, 'E'},
{101_c, 'D'},
{100_c, 'C'},
{011_c, 'B'},
{010_c, 'A'},
{00_c, 'F'}}};
// clang-format on

auto t2 = t1;
t2.canonicalize();

expect(std::ranges::equal(t1, t2));
expect(std::ranges::equal(t1, t2.canonicalize()));
};
}

0 comments on commit 2ea347f

Please sign in to comment.