Skip to content

Commit

Permalink
decompress block type 00 - no compression
Browse files Browse the repository at this point in the history
Change-Id: I5ceb11f5b6ba0ef63e250757747dab79c7958653
  • Loading branch information
garymm committed Oct 30, 2023
1 parent 217076e commit 6790495
Show file tree
Hide file tree
Showing 9 changed files with 258 additions and 13 deletions.
36 changes: 36 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "lldb bit_span_test",
"type": "lldb",
"request": "launch",
"program": "${workspaceFolder}/bazel-bin/huffman/test/bit_span_test",
"cwd": "${workspaceFolder}",
// necessary for debugging binaries built by bazel
// see:
// https://github.com/vadimcn/codelldb/wiki/Breakpoints-are-not-getting-hit#source-file-path-mismatch
"sourceMap": {
"/proc/self/cwd": "${workspaceFolder}"
},
"preLaunchTask": "build-debug",
},
{
"name": "lldb decompress_test",
"type": "lldb",
"request": "launch",
"program": "${workspaceFolder}/bazel-bin/src/test/decompress_test",
"cwd": "${workspaceFolder}",
// necessary for debugging binaries built by bazel
// see:
// https://github.com/vadimcn/codelldb/wiki/Breakpoints-are-not-getting-hit#source-file-path-mismatch
"sourceMap": {
"/proc/self/cwd": "${workspaceFolder}"
},
"preLaunchTask": "build-debug",
}
]
}
18 changes: 18 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"tasks": [
{
"args": [
"build",
"-c",
"dbg",
"//..."
],
"command": "bazel",
"group": "build",
"label": "build-debug",
"problemMatcher": [],
"type": "shell"
}
],
"version": "2.0.0"
}
9 changes: 9 additions & 0 deletions huffman/src/bit_span.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,5 +164,14 @@ class bit_span : public std::ranges::view_interface<bit_span>
consume(CHAR_BIT - bit_offset_);
}
}

/// Returns a pointer to the underlying data.
/// @pre *this aligned to a byte boundary.
[[nodiscard]]
constexpr auto byte_data() const -> const std::byte*
{
assert(bit_offset_ == 0 and "bit_span must be byte aligned to access data");
return data_;
}
};
} // namespace starflate::huffman
16 changes: 8 additions & 8 deletions huffman/test/decode_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
#include <stdexcept>
#include <utility>

constexpr auto reverse_bits(std::byte b) -> std::byte
constexpr auto reverse_bits(int b) -> std::byte
{
std::byte result{};
for (auto i = 0; i < CHAR_BIT; ++i) {
result <<= 1;
result |= std::byte{(b & std::byte{1}) == std::byte{1}};
result |= std::byte{(b & 1) == 1};
b >>= 1;
}
return result;
Expand All @@ -30,12 +30,12 @@ auto main() -> int
// encoded data from soxofaan/dahuffman readme.rst.
// We reverse the bits in each byte to match the encoding used in DEFLATE.
constexpr std::array encoded_bytes = {
reverse_bits(std::byte{134}),
reverse_bits(std::byte{124}),
reverse_bits(std::byte{37}),
reverse_bits(std::byte{19}),
reverse_bits(std::byte{105}),
reverse_bits(std::byte{64})};
reverse_bits(134),
reverse_bits(124),
reverse_bits(37),
reverse_bits(19),
reverse_bits(105),
reverse_bits(64)};

constexpr char eot = {'\4'};
static constexpr auto code_table = // clang-format off
Expand Down
2 changes: 2 additions & 0 deletions src/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,7 @@ package(default_visibility = ["//src:__subpackages__"])

cc_library(
name = "decompress",
srcs = ["decompress.cpp"],
hdrs = ["decompress.hpp"],
deps = ["//huffman"],
)
32 changes: 32 additions & 0 deletions src/decompress.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#include "decompress.hpp"

#include "huffman/huffman.hpp"

#include <expected>

namespace starflate::detail {

auto valid(BlockType type) -> bool
{
using enum BlockType;
return type == NoCompression || type == FixedHuffman ||
type == DynamicHuffman;
}

auto read_header(huffman::bit_span& compressed_bits)
-> std::expected<BlockHeader, DecompressError>
{
if (std::ranges::size(compressed_bits) < 3) {
return std::unexpected{DecompressError::InvalidBlockHeader};
}
auto type = static_cast<BlockType>(
std::uint8_t{static_cast<bool>(compressed_bits[1])} |
(std::uint8_t{static_cast<bool>(compressed_bits[2])} << 1));
if (not valid(type)) {
return std::unexpected{DecompressError::InvalidBlockHeader};
}
bool final{static_cast<bool>(compressed_bits[0])};
compressed_bits.consume(3);
return BlockHeader{final, type};
}
} // namespace starflate::detail
75 changes: 72 additions & 3 deletions src/decompress.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#pragma once

#include "huffman/huffman.hpp"

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <expected>
Expand All @@ -10,19 +13,85 @@
namespace starflate {

// error code enum
enum class Error : std::uint8_t
enum class DecompressError : std::uint8_t
{
Error,
InvalidBlockHeader,
NoCompressionLenMismatch,
};

namespace detail {

enum class BlockType : std::uint8_t
{
NoCompression,
FixedHuffman,
DynamicHuffman,
};

struct BlockHeader
{
bool final;
BlockType type;
};

auto read_header(huffman::bit_span& compressed_bits)
-> std::expected<BlockHeader, DecompressError>;
} // namespace detail

using namespace huffman::literals;

// Inspired by https://docs.python.org/3/library/zlib.html#zlib.decompress
template <std::size_t N, class ByteAllocator = std::allocator<std::byte>>
auto decompress(
[[maybe_unused]] std::span<const std::byte, N> compressed,
ByteAllocator alloc = {}) -> std::expected<std::vector<std::byte>, Error>
std::span<const std::byte, N> compressed, ByteAllocator alloc = {})
-> std::expected<std::vector<std::byte, ByteAllocator>, DecompressError>
{

using enum detail::BlockType;
auto decompressed = std::vector<std::byte, ByteAllocator>(alloc);

huffman::bit_span compressed_bits{compressed};
while (true) {
const auto header = detail::read_header(compressed_bits);
if (not header) {
return std::unexpected{header.error()};
}
if (header->type == NoCompression) { // no compression
// Any bits of input up to the next byte boundary are ignored.
compressed_bits.consume_to_byte_boundary();
const std::uint16_t len = compressed_bits.pop_16();
const std::uint16_t nlen = compressed_bits.pop_16();
if (len != static_cast<std::uint16_t>(~nlen)) {
return std::unexpected{DecompressError::NoCompressionLenMismatch};
}
assert(compressed_bits.size() >= std::size_t{len} * CHAR_BIT and
"not enough bits");

// TODO: this is probably really slow because back_inserter means we can
// only copy a single byte at a time. We should look into options for bulk
// copying.
std::copy_n(
compressed_bits.byte_data(), len, std::back_inserter(decompressed));
compressed_bits.consume(CHAR_BIT * len);
} else {
// TODO: implement
return std::unexpected{DecompressError::Error};
}
if (header->final) {
break;
}
}
return decompressed;
}

template <
std::ranges::contiguous_range R,
class ByteAllocator = std::allocator<std::byte>>
requires std::same_as<std::ranges::range_value_t<R>, std::byte>
auto decompress(const R& compressed, ByteAllocator alloc = {})
{
return decompress(std::span{compressed.data(), compressed.size()}, alloc);
}

} // namespace starflate
1 change: 1 addition & 0 deletions src/test/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ cc_test(
deps = [
"//:boost_ut",
"//src:decompress",
"@boost_ut",
],
)
82 changes: 80 additions & 2 deletions src/test/decompress_test.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,84 @@
#include "huffman/src/utility.hpp"
#include "src/decompress.hpp"

auto main() -> int
#include <boost/ut.hpp>

#include <vector>

template <class... Ts>
constexpr auto byte_vector(Ts... values)
{
return 0;
return std::vector<std::byte>{std::byte(values)...};
}

auto main() -> int
{
using ::boost::ut::eq;
using ::boost::ut::expect;
using ::boost::ut::fatal;
using ::boost::ut::test;
using namespace starflate;

test("read_header") = [] -> void {
huffman::bit_span empty{nullptr, 0, 0};
expect(detail::read_header(empty).error() ==
DecompressError::InvalidBlockHeader);

constexpr auto bad_block_type = huffman::byte_array(0b111);
huffman::bit_span bad_block_type_span{bad_block_type};
expect(detail::read_header(bad_block_type_span).error() ==
DecompressError::InvalidBlockHeader);

constexpr auto fixed = huffman::byte_array(0b010);
huffman::bit_span fixed_span{fixed};
auto header = detail::read_header(fixed_span);
expect(header.has_value())
<< "got error: " << static_cast<int>(header.error());
expect(not header->final);
expect(header->type == detail::BlockType::FixedHuffman)
<< "got type: " << static_cast<int>(header->type);

constexpr auto no_compression = huffman::byte_array(0b001);
huffman::bit_span no_compression_span{no_compression};
header = detail::read_header(no_compression_span);
expect(header.has_value())
<< "got error: " << static_cast<int>(header.error());
expect(header->final);
expect(header->type == detail::BlockType::NoCompression)
<< "got type: " << static_cast<int>(header->type);
};

test("no compression") = [] {
constexpr auto compressed = huffman::byte_array(
0b001,
5,
0, // len = 5
~5,
~0, // nlen = 5
'h',
'e',
'l',
'l',
'o');

const auto expected = byte_vector('h', 'e', 'l', 'l', 'o');

const auto actual = decompress(compressed);
expect(fatal(actual.has_value()))
<< "got error code: " << static_cast<std::int32_t>(actual.error());
expect(fatal(actual->size() == expected.size()));
expect(*actual == expected);
};

test("fixed huffman") = [] {
constexpr auto compressed = huffman::byte_array(0b101);
const auto actual = decompress(compressed);
expect(not actual.has_value());
};

test("dynamic huffman") = [] {
constexpr auto compressed = huffman::byte_array(0b011);
const auto actual = decompress(compressed);
expect(not actual.has_value());
};
};

0 comments on commit 6790495

Please sign in to comment.