Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Caller passes in dst to decompress #125

Merged
merged 1 commit into from
Jan 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 56 additions & 5 deletions src/decompress.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#include "decompress.hpp"

#include "huffman/huffman.hpp"
#include <cstdint>
#include <iterator>
#include <utility>

#include <expected>

namespace starflate::detail {
namespace starflate {
namespace detail {

auto valid(BlockType type) -> bool
{
Expand All @@ -29,4 +30,54 @@
compressed_bits.consume(3);
return BlockHeader{final, type};
}
} // namespace starflate::detail

} // namespace detail

auto decompress(std::span<const std::byte> src, std::span<std::byte> dst)
-> std::expected<DecompressResult, DecompressError>
{
using enum detail::BlockType;

huffman::bit_span src_bits{src};
std::size_t dst_written{};
for (bool was_final = false; not was_final;) {
const auto header = detail::read_header(src_bits);
oliverlee marked this conversation as resolved.
Show resolved Hide resolved
if (not header) {
return std::unexpected{header.error()};
}

Check warning on line 47 in src/decompress.cpp

View check run for this annotation

Codecov / codecov/patch

src/decompress.cpp#L46-L47

Added lines #L46 - L47 were not covered by tests
was_final = header->final;
if (header->type == NoCompression) { // no compression
// Any bits of input up to the next byte boundary are ignored.
src_bits.consume_to_byte_boundary();
const std::uint16_t len = src_bits.pop_16();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we know the input is large enough to store this?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Guessing you mean "Do we know src_bits.size() >= 16"?

bit_span.pop() asserts that, but we should probably handle it here, in the same way that we handle the "not enough bits in src" issue below. I'll do that in another PR, this one is getting a little big.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah exactly

const std::uint16_t nlen = src_bits.pop_16();
if (len != static_cast<std::uint16_t>(~nlen)) {
return std::unexpected{DecompressError::NoCompressionLenMismatch};
}

Check warning on line 56 in src/decompress.cpp

View check run for this annotation

Codecov / codecov/patch

src/decompress.cpp#L55-L56

Added lines #L55 - L56 were not covered by tests
// TODO: should we return an error instead of assert?
oliverlee marked this conversation as resolved.
Show resolved Hide resolved
assert(
std::cmp_greater_equal(
src_bits.size(), std::size_t{len} * CHAR_BIT) and
"not enough bits in src");

if (std::ranges::size(dst) < len) {
return DecompressResult{src, dst_written, len};
}

std::copy_n(src_bits.byte_data(), len, dst.begin());
src_bits.consume(CHAR_BIT * len);
dst = dst.subspan(len);
dst_written += len;
} else {
// TODO: implement
return std::unexpected{DecompressError::Error};
}

Check warning on line 74 in src/decompress.cpp

View check run for this annotation

Codecov / codecov/patch

src/decompress.cpp#L73-L74

Added lines #L73 - L74 were not covered by tests
const auto distance =
std::distance(std::ranges::data(src), src_bits.byte_data());
assert(distance >= 0 and "distance must be positive");
src = src.subspan(static_cast<size_t>(distance));
garymm marked this conversation as resolved.
Show resolved Hide resolved
}
return DecompressResult{src, dst_written, 0};
}

} // namespace starflate
79 changes: 24 additions & 55 deletions src/decompress.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@
#include "huffman/huffman.hpp"

#include <cstddef>
#include <cstdint>
#include <expected>
#include <memory>
#include <ranges>
#include <span>
#include <vector>

namespace starflate {

Expand Down Expand Up @@ -38,63 +36,34 @@ auto read_header(huffman::bit_span& compressed_bits)
-> std::expected<BlockHeader, DecompressError>;
} // namespace detail

using namespace huffman::literals;

// Inspired by https://docs.python.org/3/library/zlib.html#zlib.decompress
template <std::size_t N, class ByteAllocator = std::allocator<std::byte>>
auto decompress(
std::span<const std::byte, N> compressed, ByteAllocator alloc = {})
-> std::expected<std::vector<std::byte, ByteAllocator>, DecompressError>
/// The result of decompress.
///
struct DecompressResult
{
std::span<const std::byte> remaining_src; ///< Remaining source data after
///< decompression.
std::size_t dst_written; ///< Number of bytes written to dst.
std::size_t min_next_dst_size; ///< Minimum number of bytes required in dst
///< for the next decompression. This is only
///< enough space for decompression of a
///< single block
};

using enum detail::BlockType;
auto decompressed = std::vector<std::byte, ByteAllocator>(alloc);

huffman::bit_span compressed_bits{compressed};
while (true) {
const auto header = detail::read_header(compressed_bits);
if (not header) {
return std::unexpected{header.error()};
}
if (header->type == NoCompression) { // no compression
// Any bits of input up to the next byte boundary are ignored.
compressed_bits.consume_to_byte_boundary();
const std::uint16_t len = compressed_bits.pop_16();
const std::uint16_t nlen = compressed_bits.pop_16();
if (len != static_cast<std::uint16_t>(~nlen)) {
return std::unexpected{DecompressError::NoCompressionLenMismatch};
}
assert(
std::cmp_greater_equal(
compressed_bits.size(), std::size_t{len} * CHAR_BIT) and
"not enough bits");

// Using vector::insert instead of std::copy to allow for bulk copying.
decompressed.insert(
decompressed.end(),
compressed_bits.byte_data(),
compressed_bits
.byte_data() + // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
len);
compressed_bits.consume(CHAR_BIT * len);
} else {
// TODO: implement
return std::unexpected{DecompressError::Error};
}
if (header->final) {
break;
}
}
return decompressed;
}
/// Decompresses the given source data into the destination buffer.
///
/// @param src The source data to decompress.
/// @param dst The destination buffer to store the decompressed data.
/// @return An expected value containing the decompression result if successful,
/// or an error code if failed.
///
auto decompress(std::span<const std::byte> src, std::span<std::byte> dst)
-> std::expected<DecompressResult, DecompressError>;

template <
std::ranges::contiguous_range R,
class ByteAllocator = std::allocator<std::byte>>
template <std::ranges::contiguous_range R>
requires std::same_as<std::ranges::range_value_t<R>, std::byte>
auto decompress(const R& compressed, ByteAllocator alloc = {})
auto decompress(const R& src, std::span<std::byte> dst)
{
return decompress(std::span{compressed.data(), compressed.size()}, alloc);
return decompress(std::span{src.data(), src.size()}, dst);
}

} // namespace starflate
60 changes: 43 additions & 17 deletions src/test/decompress_test.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#include "huffman/huffman.hpp"
#include "huffman/src/utility.hpp"
#include "src/decompress.hpp"
#include "tools/cpp/runfiles/runfiles.h"

#include <boost/ut.hpp>

#include <array>
#include <fstream>
#include <iterator>
#include <memory>
Expand Down Expand Up @@ -83,24 +85,48 @@ auto main(int, char* argv[]) -> int

test("no compression") = [] {
constexpr auto compressed = huffman::byte_array(
0b001,
5,
0, // len = 5
~5,
~0, // nlen = 5
'h',
0b000, // no compression, not final
4,
0, // len = 4
~4,
~0, // nlen = 4
'r',
'o',
's',
'e',
'l',
'l',
'o');

const auto expected = byte_vector('h', 'e', 'l', 'l', 'o');

const auto actual = decompress(compressed);
expect(fatal(actual.has_value()))
<< "got error code: " << static_cast<std::int32_t>(actual.error());
expect(fatal(actual->size() == expected.size()));
expect(*actual == expected);
0b001, // no compression, final
3,
0, // len = 3
~3,
~0, // nlen = 3
'b',
'u',
'd');
std::span<const std::byte> src{compressed};

constexpr auto expected_0 = huffman::byte_array('r', 'o', 's', 'e');
constexpr auto expected_1 = huffman::byte_array('b', 'u', 'd');
const std::array<std::span<const std::byte>, 2> expecteds{
expected_0, expected_1};

std::array<std::byte, 4> dst_array{};
const std::span<std::byte> dst{dst_array};
for (std::size_t i = 0; i < expecteds.size(); ++i) {
const auto result = decompress(src, dst);
expect(result.has_value())
<< "got error code: " << static_cast<std::int32_t>(result.error());
if (i == 0) {
expect(not result->remaining_src.empty());
expect(result->min_next_dst_size == expecteds.at(1).size());
} else {
expect(result->remaining_src.empty());
expect(result->min_next_dst_size == 0);
}
const auto expected = expecteds.at(i);
expect(result->dst_written == expected.size());
expect(std::ranges::equal(dst.subspan(0, expected.size()), expected));
src = result->remaining_src;
}
};

test("fixed huffman") = [argv] {
Expand Down
Loading