Skip to content

Commit

Permalink
Caller passes in dst to decompress (#125)
Browse files Browse the repository at this point in the history
This avoids allocation inside decompress.

Also:
* Doxygen style comments for public interface.
* Move decompress implementation to .cpp since it is no longer
  templated.
* Move checking of final to the top of the loop.

Change-Id: I4afa1e306c5d20c1a83d3173292b77ab6e8fc01c
  • Loading branch information
garymm authored Jan 14, 2024
1 parent c1636cd commit 2e6a83d
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 77 deletions.
61 changes: 56 additions & 5 deletions src/decompress.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#include "decompress.hpp"

#include "huffman/huffman.hpp"
#include <cstdint>
#include <iterator>
#include <utility>

#include <expected>

namespace starflate::detail {
namespace starflate {
namespace detail {

auto valid(BlockType type) -> bool
{
Expand All @@ -29,4 +30,54 @@ auto read_header(huffman::bit_span& compressed_bits)
compressed_bits.consume(3);
return BlockHeader{final, type};
}
} // namespace starflate::detail

} // namespace detail

auto decompress(std::span<const std::byte> src, std::span<std::byte> dst)
-> std::expected<DecompressResult, DecompressError>
{
using enum detail::BlockType;

huffman::bit_span src_bits{src};
std::size_t dst_written{};
for (bool was_final = false; not was_final;) {
const auto header = detail::read_header(src_bits);
if (not header) {
return std::unexpected{header.error()};
}
was_final = header->final;
if (header->type == NoCompression) { // no compression
// Any bits of input up to the next byte boundary are ignored.
src_bits.consume_to_byte_boundary();
const std::uint16_t len = src_bits.pop_16();
const std::uint16_t nlen = src_bits.pop_16();
if (len != static_cast<std::uint16_t>(~nlen)) {
return std::unexpected{DecompressError::NoCompressionLenMismatch};
}
// TODO: should we return an error instead of assert?
assert(
std::cmp_greater_equal(
src_bits.size(), std::size_t{len} * CHAR_BIT) and
"not enough bits in src");

if (std::ranges::size(dst) < len) {
return DecompressResult{src, dst_written, len};
}

std::copy_n(src_bits.byte_data(), len, dst.begin());
src_bits.consume(CHAR_BIT * len);
dst = dst.subspan(len);
dst_written += len;
} else {
// TODO: implement
return std::unexpected{DecompressError::Error};
}
const auto distance =
std::distance(std::ranges::data(src), src_bits.byte_data());
assert(distance >= 0 and "distance must be positive");
src = src.subspan(static_cast<size_t>(distance));
}
return DecompressResult{src, dst_written, 0};
}

} // namespace starflate
79 changes: 24 additions & 55 deletions src/decompress.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@
#include "huffman/huffman.hpp"

#include <cstddef>
#include <cstdint>
#include <expected>
#include <memory>
#include <ranges>
#include <span>
#include <vector>

namespace starflate {

Expand Down Expand Up @@ -38,63 +36,34 @@ auto read_header(huffman::bit_span& compressed_bits)
-> std::expected<BlockHeader, DecompressError>;
} // namespace detail

using namespace huffman::literals;

// Inspired by https://docs.python.org/3/library/zlib.html#zlib.decompress
template <std::size_t N, class ByteAllocator = std::allocator<std::byte>>
auto decompress(
std::span<const std::byte, N> compressed, ByteAllocator alloc = {})
-> std::expected<std::vector<std::byte, ByteAllocator>, DecompressError>
/// The result of decompress.
///
struct DecompressResult
{
std::span<const std::byte> remaining_src; ///< Remaining source data after
///< decompression.
std::size_t dst_written; ///< Number of bytes written to dst.
std::size_t min_next_dst_size; ///< Minimum number of bytes required in dst
///< for the next decompression. This is only
///< enough space for decompression of a
///< single block
};

using enum detail::BlockType;
auto decompressed = std::vector<std::byte, ByteAllocator>(alloc);

huffman::bit_span compressed_bits{compressed};
while (true) {
const auto header = detail::read_header(compressed_bits);
if (not header) {
return std::unexpected{header.error()};
}
if (header->type == NoCompression) { // no compression
// Any bits of input up to the next byte boundary are ignored.
compressed_bits.consume_to_byte_boundary();
const std::uint16_t len = compressed_bits.pop_16();
const std::uint16_t nlen = compressed_bits.pop_16();
if (len != static_cast<std::uint16_t>(~nlen)) {
return std::unexpected{DecompressError::NoCompressionLenMismatch};
}
assert(
std::cmp_greater_equal(
compressed_bits.size(), std::size_t{len} * CHAR_BIT) and
"not enough bits");

// Using vector::insert instead of std::copy to allow for bulk copying.
decompressed.insert(
decompressed.end(),
compressed_bits.byte_data(),
compressed_bits
.byte_data() + // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
len);
compressed_bits.consume(CHAR_BIT * len);
} else {
// TODO: implement
return std::unexpected{DecompressError::Error};
}
if (header->final) {
break;
}
}
return decompressed;
}
/// Decompresses the given source data into the destination buffer.
///
/// @param src The source data to decompress.
/// @param dst The destination buffer to store the decompressed data.
/// @return An expected value containing the decompression result if successful,
/// or an error code if failed.
///
auto decompress(std::span<const std::byte> src, std::span<std::byte> dst)
-> std::expected<DecompressResult, DecompressError>;

template <
std::ranges::contiguous_range R,
class ByteAllocator = std::allocator<std::byte>>
template <std::ranges::contiguous_range R>
requires std::same_as<std::ranges::range_value_t<R>, std::byte>
auto decompress(const R& compressed, ByteAllocator alloc = {})
auto decompress(const R& src, std::span<std::byte> dst)
{
return decompress(std::span{compressed.data(), compressed.size()}, alloc);
return decompress(std::span{src.data(), src.size()}, dst);
}

} // namespace starflate
60 changes: 43 additions & 17 deletions src/test/decompress_test.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#include "huffman/huffman.hpp"
#include "huffman/src/utility.hpp"
#include "src/decompress.hpp"
#include "tools/cpp/runfiles/runfiles.h"

#include <boost/ut.hpp>

#include <array>
#include <fstream>
#include <iterator>
#include <memory>
Expand Down Expand Up @@ -83,24 +85,48 @@ auto main(int, char* argv[]) -> int

test("no compression") = [] {
constexpr auto compressed = huffman::byte_array(
0b001,
5,
0, // len = 5
~5,
~0, // nlen = 5
'h',
0b000, // no compression, not final
4,
0, // len = 4
~4,
~0, // nlen = 4
'r',
'o',
's',
'e',
'l',
'l',
'o');

const auto expected = byte_vector('h', 'e', 'l', 'l', 'o');

const auto actual = decompress(compressed);
expect(fatal(actual.has_value()))
<< "got error code: " << static_cast<std::int32_t>(actual.error());
expect(fatal(actual->size() == expected.size()));
expect(*actual == expected);
0b001, // no compression, final
3,
0, // len = 3
~3,
~0, // nlen = 3
'b',
'u',
'd');
std::span<const std::byte> src{compressed};

constexpr auto expected_0 = huffman::byte_array('r', 'o', 's', 'e');
constexpr auto expected_1 = huffman::byte_array('b', 'u', 'd');
const std::array<std::span<const std::byte>, 2> expecteds{
expected_0, expected_1};

std::array<std::byte, 4> dst_array{};
const std::span<std::byte> dst{dst_array};
for (std::size_t i = 0; i < expecteds.size(); ++i) {
const auto result = decompress(src, dst);
expect(result.has_value())
<< "got error code: " << static_cast<std::int32_t>(result.error());
if (i == 0) {
expect(not result->remaining_src.empty());
expect(result->min_next_dst_size == expecteds.at(1).size());
} else {
expect(result->remaining_src.empty());
expect(result->min_next_dst_size == 0);
}
const auto expected = expecteds.at(i);
expect(result->dst_written == expected.size());
expect(std::ranges::equal(dst.subspan(0, expected.size()), expected));
src = result->remaining_src;
}
};

test("fixed huffman") = [argv] {
Expand Down

0 comments on commit 2e6a83d

Please sign in to comment.