Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

string: add length management functions #243

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
137 changes: 111 additions & 26 deletions include/cista/containers/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <string_view>

#include "cista/containers/ptr.h"
#include "cista/endian/detection.h"
#include "cista/exception.h"
#include "cista/type_traits.h"

Expand Down Expand Up @@ -75,9 +76,12 @@ struct generic_string {
friend CharT* end(generic_string& s) { return s.end(); }

bool is_short() const noexcept { return s_.is_short_; }
bool is_self_allocated() const noexcept {
return !is_short() && (h_.capacity_ != 0);
}

void reset() noexcept {
if (!h_.is_short_ && h_.ptr_ != nullptr && h_.self_allocated_) {
if (is_self_allocated()) {
std::free(data());
}
h_ = heap{};
Expand All @@ -100,21 +104,11 @@ struct generic_string {
if (str == nullptr || len == 0U) {
return;
}
s_.is_short_ = (len <= short_length_limit);
if (s_.is_short_) {
std::memcpy(s_.s_, str, len * sizeof(CharT));
for (auto i = len; i < short_length_limit; ++i) {
s_.s_[i] = 0;
}
} else {
h_.ptr_ = static_cast<CharT*>(std::malloc(len * sizeof(CharT)));
if (h_.ptr_ == nullptr) {
throw_exception(std::bad_alloc{});
}
internal_change_capacity(len);
if (!is_short()) {
h_.size_ = len;
h_.self_allocated_ = true;
std::memcpy(data(), str, len * sizeof(CharT));
}
std::memcpy(data(), str, len * sizeof(CharT));
}

void set_non_owning(std::basic_string<CharT> const& v) {
Expand All @@ -137,8 +131,7 @@ struct generic_string {
return set_owning(str, len);
}

h_.is_short_ = false;
h_.self_allocated_ = false;
h_ = heap{};
h_.ptr_ = str;
h_.size_ = len;
}
Expand Down Expand Up @@ -167,13 +160,71 @@ struct generic_string {
reset();
if (s.is_short()) {
std::memcpy(static_cast<void*>(this), &s, sizeof(s));
} else if (s.h_.self_allocated_) {
} else if (s.is_self_allocated()) {
set_owning(s.data(), s.size());
} else {
set_non_owning(s.data(), s.size());
}
}

void internal_change_capacity(msize_t new_capacity) {
auto initialize_buffer = [](CharT* dest, msize_t capacity, CharT const* src,
msize_t size) -> void {
if (size && dest != src) {
std::memcpy(dest, src, size * sizeof(CharT));
}
std::memset(dest + size, 0, (capacity - size) * sizeof(CharT));
};
auto make_heap = [](CharT* cur_buf, msize_t new_cap) -> heap {
new_cap = (new_cap + msize_t{0xFF}) & ~msize_t{0xFF};
heap h{};
#ifdef CISTA_LITTLE_ENDIAN
h.capacity_ = new_cap;
#else
h.capacity_ = new_cap >> 8;
#endif
h.ptr_ =
static_cast<CharT*>(std::realloc(cur_buf, new_cap * sizeof(CharT)));
if (!h.ptr_) {
throw_exception(std::bad_alloc{});
}
return h;
};

if (new_capacity == 0) {
reset();
return;
}
msize_t new_size = std::min(size(), new_capacity);
if (new_capacity <= short_length_limit) {
stack s{};
initialize_buffer(s.s_, short_length_limit, data(), new_size);
if (!is_short()) {
reset();
}
s_ = s;
} else {
heap h{};
if (is_self_allocated()) {
h = make_heap(data(), new_capacity);
initialize_buffer(const_cast<CharT*>(h.ptr()), h.capacity(), h.ptr(),
new_size);
} else {
h = make_heap(nullptr, new_capacity);
initialize_buffer(const_cast<CharT*>(h.ptr()), h.capacity(), data(),
new_size);
}
h.size_ = new_size;
h_ = h;
}
}
constexpr msize_t capacity() const noexcept {
if (is_short()) {
return short_length_limit;
}
return h_.capacity();
}

bool empty() const noexcept { return size() == 0U; }
std::basic_string_view<CharT> view() const noexcept {
return {data(), size()};
Expand Down Expand Up @@ -331,11 +382,7 @@ struct generic_string {
}

CharT const* internal_data() const noexcept {
if constexpr (std::is_pointer_v<Ptr>) {
return is_short() ? s_.s_ : h_.ptr_;
} else {
return is_short() ? s_.s_ : h_.ptr_.get();
}
return is_short() ? s_.s_ : h_.ptr();
}

CharT* data() noexcept { return const_cast<CharT*>(internal_data()); }
Expand All @@ -352,7 +399,7 @@ struct generic_string {
}

generic_string& erase(msize_t const pos, msize_t const n) {
if (!is_short() && !h_.self_allocated_) {
if (!is_short() && !is_self_allocated()) {
set_owning(view());
}
auto const size_before = size();
Expand Down Expand Up @@ -424,11 +471,27 @@ struct generic_string {
}

struct heap {
bool is_short_{false};
bool self_allocated_{false};
std::uint16_t __fill__{0};
union {
bool is_short_;
std::uint32_t capacity_{0};
};
std::uint32_t size_{0};
Ptr ptr_{nullptr};

std::uint32_t capacity() const noexcept {
#ifdef CISTA_LITTLE_ENDIAN
return capacity_;
#else
return capacity_ << 8;
#endif
}
CharT const* ptr() const noexcept {
if constexpr (std::is_pointer_v<Ptr>) {
return ptr_;
} else {
return ptr_.get();
}
}
};

struct stack {
Expand All @@ -448,6 +511,7 @@ struct generic_string {
template <typename Ptr>
struct basic_string : public generic_string<Ptr> {
using base = generic_string<Ptr>;
using msize_t = typename base::msize_t;
using CharT = typename base::CharT;

using base::base;
Expand Down Expand Up @@ -496,6 +560,27 @@ struct basic_string : public generic_string<Ptr> {
base::set_owning(s);
return *this;
}

void resize(msize_t new_size) {
if (new_size <= base::short_length_limit) {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean that the allocation will be erased if the string has been allocated (ie. was longer than short length limit at one point)?

I am thinking about use cases similar to this one where you want that it allocates a few times until it has a size that will never require new allocations anymore.

std::string s;
while (std::getline(s)) { /* ... */ }

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, that is likely not the best decision. I will look into it

base::internal_change_capacity(new_size);
return;
}
if (new_size > base::capacity()) {
base::internal_change_capacity(new_size);
}
if (new_size < base::h_.size_) {
std::memset(base::data() + new_size, 0,
(base::h_.size_ - new_size) * sizeof(CharT));
}
base::h_.size_ = new_size;
}
void reserve(msize_t cap) {
if (cap > base::capacity()) {
base::internal_change_capacity(cap);
}
}
void shrink_to_fit() { base::internal_change_capacity(base::size()); }
};

template <typename Ptr>
Expand Down
5 changes: 2 additions & 3 deletions include/cista/serialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ void serialize(Ctx& c, generic_string<Ptr> const* origin, offset_t const pos) {
: start - cista_member_offset(Type, h_.ptr_) - pos));
c.write(pos + cista_member_offset(Type, h_.size_),
convert_endian<Ctx::MODE>(origin->h_.size_));
c.write(pos + cista_member_offset(Type, h_.self_allocated_), false);
c.write(pos + cista_member_offset(Type, h_.capacity_), std::uint32_t{0});
}

template <typename Ctx, typename T, typename SizeType,
Expand Down Expand Up @@ -829,8 +829,7 @@ void check_state(Ctx const& c, generic_string<Ptr>* el) {
if (!el->is_short()) {
c.check_ptr(el->h_.ptr_,
el->h_.size_ * sizeof(typename generic_string<Ptr>::CharT));
c.check_bool(el->h_.self_allocated_);
c.require(!el->h_.self_allocated_, "string self-allocated");
c.require(!el->is_self_allocated(), "string self-allocated");
c.require((el->h_.size_ == 0) == (el->h_.ptr_ == nullptr),
"str size=0 <=> ptr=0");
}
Expand Down
45 changes: 45 additions & 0 deletions test/cstring_serialize_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -203,3 +203,48 @@ TEST_CASE("u32string serialization endian long") {

CHECK(*serialized_be == U32STR_LONG_CORNER_CASE);
}

TEST_CASE_TEMPLATE("string serialization capacity", StrT, cista::raw::string,
u16string, u32string) {
using CharT = typename StrT::CharT;
auto get_short = []() -> CharT const* {
void const* ptr;
switch (sizeof(CharT)) {
case sizeof(char): ptr = SHORT_STR; break;
case sizeof(char16_t): ptr = U16STR_SHORT; break;
case sizeof(char32_t): ptr = U32STR_SHORT; break;
}
return static_cast<CharT const*>(ptr);
};
auto get_long = []() -> CharT const* {
void const* ptr;
switch (sizeof(CharT)) {
case sizeof(char): ptr = LONG_STR; break;
case sizeof(char16_t): ptr = U16STR_LONG; break;
case sizeof(char32_t): ptr = U32STR_LONG; break;
}
return static_cast<CharT const*>(ptr);
};

StrT s_s = get_short(), s_l = get_long();
cista::byte_buf buf_s = cista::serialize(s_s), buf_l = cista::serialize(s_l);
StrT *serialized_s = cista::deserialize<StrT>(buf_s),
*serialized_l = cista::deserialize<StrT>(buf_l);
CharT const *ptr_s = serialized_s->data(), *ptr_l = serialized_l->data();

CHECK(serialized_s->capacity() == StrT::short_length_limit);
CHECK(serialized_l->capacity() == 0);

serialized_s->shrink_to_fit();
serialized_l->shrink_to_fit();

CHECK(serialized_s->capacity() == StrT::short_length_limit);
CHECK(serialized_l->capacity() == 256);
CHECK(ptr_s == serialized_s->data());
CHECK(ptr_l != serialized_l->data());
CHECK(*serialized_s == get_short());
CHECK(*serialized_l == get_long());

serialized_s->~StrT();
serialized_l->~StrT();
}
Loading
Loading