From 67904958557a2f6743c6fcc49541bf643a26fc5c Mon Sep 17 00:00:00 2001 From: Gary Miguel Date: Wed, 18 Oct 2023 20:20:50 +0000 Subject: [PATCH] decompress block type 00 - no compression Change-Id: I5ceb11f5b6ba0ef63e250757747dab79c7958653 --- .vscode/launch.json | 36 ++++++++++++++++ .vscode/tasks.json | 18 ++++++++ huffman/src/bit_span.hpp | 9 ++++ huffman/test/decode_test.cpp | 16 +++---- src/BUILD.bazel | 2 + src/decompress.cpp | 32 ++++++++++++++ src/decompress.hpp | 75 +++++++++++++++++++++++++++++++-- src/test/BUILD.bazel | 1 + src/test/decompress_test.cpp | 82 +++++++++++++++++++++++++++++++++++- 9 files changed, 258 insertions(+), 13 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 .vscode/tasks.json create mode 100644 src/decompress.cpp diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..8e9ba8c --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,36 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "lldb bit_span_test", + "type": "lldb", + "request": "launch", + "program": "${workspaceFolder}/bazel-bin/huffman/test/bit_span_test", + "cwd": "${workspaceFolder}", + // necessary for debugging binaries built by bazel + // see: + // https://github.com/vadimcn/codelldb/wiki/Breakpoints-are-not-getting-hit#source-file-path-mismatch + "sourceMap": { + "/proc/self/cwd": "${workspaceFolder}" + }, + "preLaunchTask": "build-debug", + }, + { + "name": "lldb decompress_test", + "type": "lldb", + "request": "launch", + "program": "${workspaceFolder}/bazel-bin/src/test/decompress_test", + "cwd": "${workspaceFolder}", + // necessary for debugging binaries built by bazel + // see: + // https://github.com/vadimcn/codelldb/wiki/Breakpoints-are-not-getting-hit#source-file-path-mismatch + "sourceMap": { + "/proc/self/cwd": "${workspaceFolder}" + }, + "preLaunchTask": "build-debug", + } + ] +} diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..017b1b7 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,18 @@ +{ + "tasks": [ + { + "args": [ + "build", + "-c", + "dbg", + "//..." + ], + "command": "bazel", + "group": "build", + "label": "build-debug", + "problemMatcher": [], + "type": "shell" + } + ], + "version": "2.0.0" +} diff --git a/huffman/src/bit_span.hpp b/huffman/src/bit_span.hpp index 4665a7f..b53720e 100644 --- a/huffman/src/bit_span.hpp +++ b/huffman/src/bit_span.hpp @@ -164,5 +164,14 @@ class bit_span : public std::ranges::view_interface consume(CHAR_BIT - bit_offset_); } } + + /// Returns a pointer to the underlying data. + /// @pre *this aligned to a byte boundary. + [[nodiscard]] + constexpr auto byte_data() const -> const std::byte* + { + assert(bit_offset_ == 0 and "bit_span must be byte aligned to access data"); + return data_; + } }; } // namespace starflate::huffman diff --git a/huffman/test/decode_test.cpp b/huffman/test/decode_test.cpp index ca41027..b7e3d7c 100644 --- a/huffman/test/decode_test.cpp +++ b/huffman/test/decode_test.cpp @@ -7,12 +7,12 @@ #include #include -constexpr auto reverse_bits(std::byte b) -> std::byte +constexpr auto reverse_bits(int b) -> std::byte { std::byte result{}; for (auto i = 0; i < CHAR_BIT; ++i) { result <<= 1; - result |= std::byte{(b & std::byte{1}) == std::byte{1}}; + result |= std::byte{(b & 1) == 1}; b >>= 1; } return result; @@ -30,12 +30,12 @@ auto main() -> int // encoded data from soxofaan/dahuffman readme.rst. // We reverse the bits in each byte to match the encoding used in DEFLATE. constexpr std::array encoded_bytes = { - reverse_bits(std::byte{134}), - reverse_bits(std::byte{124}), - reverse_bits(std::byte{37}), - reverse_bits(std::byte{19}), - reverse_bits(std::byte{105}), - reverse_bits(std::byte{64})}; + reverse_bits(134), + reverse_bits(124), + reverse_bits(37), + reverse_bits(19), + reverse_bits(105), + reverse_bits(64)}; constexpr char eot = {'\4'}; static constexpr auto code_table = // clang-format off diff --git a/src/BUILD.bazel b/src/BUILD.bazel index b447bf4..9616c75 100644 --- a/src/BUILD.bazel +++ b/src/BUILD.bazel @@ -4,5 +4,7 @@ package(default_visibility = ["//src:__subpackages__"]) cc_library( name = "decompress", + srcs = ["decompress.cpp"], hdrs = ["decompress.hpp"], + deps = ["//huffman"], ) diff --git a/src/decompress.cpp b/src/decompress.cpp new file mode 100644 index 0000000..608c4dd --- /dev/null +++ b/src/decompress.cpp @@ -0,0 +1,32 @@ +#include "decompress.hpp" + +#include "huffman/huffman.hpp" + +#include + +namespace starflate::detail { + +auto valid(BlockType type) -> bool +{ + using enum BlockType; + return type == NoCompression || type == FixedHuffman || + type == DynamicHuffman; +} + +auto read_header(huffman::bit_span& compressed_bits) + -> std::expected +{ + if (std::ranges::size(compressed_bits) < 3) { + return std::unexpected{DecompressError::InvalidBlockHeader}; + } + auto type = static_cast( + std::uint8_t{static_cast(compressed_bits[1])} | + (std::uint8_t{static_cast(compressed_bits[2])} << 1)); + if (not valid(type)) { + return std::unexpected{DecompressError::InvalidBlockHeader}; + } + bool final{static_cast(compressed_bits[0])}; + compressed_bits.consume(3); + return BlockHeader{final, type}; +} +} // namespace starflate::detail diff --git a/src/decompress.hpp b/src/decompress.hpp index 9a67ce9..c989b77 100644 --- a/src/decompress.hpp +++ b/src/decompress.hpp @@ -1,5 +1,8 @@ #pragma once +#include "huffman/huffman.hpp" + +#include #include #include #include @@ -10,19 +13,85 @@ namespace starflate { // error code enum -enum class Error : std::uint8_t +enum class DecompressError : std::uint8_t { Error, + InvalidBlockHeader, + NoCompressionLenMismatch, +}; + +namespace detail { + +enum class BlockType : std::uint8_t +{ + NoCompression, + FixedHuffman, + DynamicHuffman, +}; + +struct BlockHeader +{ + bool final; + BlockType type; }; +auto read_header(huffman::bit_span& compressed_bits) + -> std::expected; +} // namespace detail + +using namespace huffman::literals; + // Inspired by https://docs.python.org/3/library/zlib.html#zlib.decompress template > auto decompress( - [[maybe_unused]] std::span compressed, - ByteAllocator alloc = {}) -> std::expected, Error> + std::span compressed, ByteAllocator alloc = {}) + -> std::expected, DecompressError> { + + using enum detail::BlockType; auto decompressed = std::vector(alloc); + + huffman::bit_span compressed_bits{compressed}; + while (true) { + const auto header = detail::read_header(compressed_bits); + if (not header) { + return std::unexpected{header.error()}; + } + if (header->type == NoCompression) { // no compression + // Any bits of input up to the next byte boundary are ignored. + compressed_bits.consume_to_byte_boundary(); + const std::uint16_t len = compressed_bits.pop_16(); + const std::uint16_t nlen = compressed_bits.pop_16(); + if (len != static_cast(~nlen)) { + return std::unexpected{DecompressError::NoCompressionLenMismatch}; + } + assert(compressed_bits.size() >= std::size_t{len} * CHAR_BIT and + "not enough bits"); + + // TODO: this is probably really slow because back_inserter means we can + // only copy a single byte at a time. We should look into options for bulk + // copying. + std::copy_n( + compressed_bits.byte_data(), len, std::back_inserter(decompressed)); + compressed_bits.consume(CHAR_BIT * len); + } else { + // TODO: implement + return std::unexpected{DecompressError::Error}; + } + if (header->final) { + break; + } + } return decompressed; } +template < + std::ranges::contiguous_range R, + class ByteAllocator = std::allocator> + requires std::same_as, std::byte> +auto decompress(const R& compressed, ByteAllocator alloc = {}) +{ + return decompress(std::span{compressed.data(), compressed.size()}, alloc); +} + } // namespace starflate diff --git a/src/test/BUILD.bazel b/src/test/BUILD.bazel index 6b44e93..a2c17f7 100644 --- a/src/test/BUILD.bazel +++ b/src/test/BUILD.bazel @@ -7,5 +7,6 @@ cc_test( deps = [ "//:boost_ut", "//src:decompress", + "@boost_ut", ], ) diff --git a/src/test/decompress_test.cpp b/src/test/decompress_test.cpp index 55d2ff9..851994f 100644 --- a/src/test/decompress_test.cpp +++ b/src/test/decompress_test.cpp @@ -1,6 +1,84 @@ +#include "huffman/src/utility.hpp" #include "src/decompress.hpp" -auto main() -> int +#include + +#include + +template +constexpr auto byte_vector(Ts... values) { - return 0; + return std::vector{std::byte(values)...}; } + +auto main() -> int +{ + using ::boost::ut::eq; + using ::boost::ut::expect; + using ::boost::ut::fatal; + using ::boost::ut::test; + using namespace starflate; + + test("read_header") = [] -> void { + huffman::bit_span empty{nullptr, 0, 0}; + expect(detail::read_header(empty).error() == + DecompressError::InvalidBlockHeader); + + constexpr auto bad_block_type = huffman::byte_array(0b111); + huffman::bit_span bad_block_type_span{bad_block_type}; + expect(detail::read_header(bad_block_type_span).error() == + DecompressError::InvalidBlockHeader); + + constexpr auto fixed = huffman::byte_array(0b010); + huffman::bit_span fixed_span{fixed}; + auto header = detail::read_header(fixed_span); + expect(header.has_value()) + << "got error: " << static_cast(header.error()); + expect(not header->final); + expect(header->type == detail::BlockType::FixedHuffman) + << "got type: " << static_cast(header->type); + + constexpr auto no_compression = huffman::byte_array(0b001); + huffman::bit_span no_compression_span{no_compression}; + header = detail::read_header(no_compression_span); + expect(header.has_value()) + << "got error: " << static_cast(header.error()); + expect(header->final); + expect(header->type == detail::BlockType::NoCompression) + << "got type: " << static_cast(header->type); + }; + + test("no compression") = [] { + constexpr auto compressed = huffman::byte_array( + 0b001, + 5, + 0, // len = 5 + ~5, + ~0, // nlen = 5 + 'h', + 'e', + 'l', + 'l', + 'o'); + + const auto expected = byte_vector('h', 'e', 'l', 'l', 'o'); + + const auto actual = decompress(compressed); + expect(fatal(actual.has_value())) + << "got error code: " << static_cast(actual.error()); + expect(fatal(actual->size() == expected.size())); + expect(*actual == expected); + }; + + test("fixed huffman") = [] { + constexpr auto compressed = huffman::byte_array(0b101); + const auto actual = decompress(compressed); + expect(not actual.has_value()); + }; + + test("dynamic huffman") = [] { + constexpr auto compressed = huffman::byte_array(0b011); + const auto actual = decompress(compressed); + expect(not actual.has_value()); + }; +};