Skip to content

Commit

Permalink
Stop using null terminators on JSON (de)serialization. (#5353)
Browse files Browse the repository at this point in the history
[SC-58051](https://app.shortcut.com/tiledb-inc/story/58051/stop-using-null-terminators-on-json-de-serialization)

This PR updates the JSON deserialization code to explicitly pass the
message's length to capnproto, instead of appending a null terminator
and relying on it to get the length. Correspondingly, it removes the
null terminator in serialized JSON messages, which is technically a
behavior breaking change but the null terminator gets already removed by
both [the Go
API](https://github.com/TileDB-Inc/TileDB-Go/blob/9ec3a9462135bf6005dcc06a1f44a6dc24afecba/buffer.go#L105-L107)
and the REST server (see story), and the serialization APIs are
documented as being for internal use.

---
TYPE: NO_HISTORY
  • Loading branch information
teo-tsirpanis authored Nov 26, 2024
1 parent 9f0acfa commit 414a261
Show file tree
Hide file tree
Showing 19 changed files with 127 additions and 203 deletions.
9 changes: 4 additions & 5 deletions test/regression/targets/sc-18250.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

#include <test/support/tdb_catch.h>

static const char* schema_str = R"rstr(
static constexpr std::string_view schema_str = R"rstr(
{"arrayType":"dense","attributes":[{
"cellValNum":1,"compressor":"NO_COMPRESSION",
"compressorLevel":-1,"name":"a1","type":"INT32"}],
Expand Down Expand Up @@ -33,11 +33,10 @@ TEST_CASE(
status = tiledb_buffer_alloc(ctx, &buf);
REQUIRE(status == TILEDB_OK);

status =
tiledb_buffer_set_data(ctx, buf, (void*)schema_str, sizeof(schema_str));
status = tiledb_buffer_set_data(
ctx, buf, (void*)schema_str.data(), schema_str.size());
REQUIRE(status == TILEDB_OK);

status = tiledb_deserialize_array_schema(
ctx, buf, tiledb_serialization_type_t(0), 0, &schema);
status = tiledb_deserialize_array_schema(ctx, buf, TILEDB_JSON, 0, &schema);
REQUIRE(status == TILEDB_OK);
}
38 changes: 36 additions & 2 deletions test/src/unit-cppapi-array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ struct CPPArrayFx {
static const unsigned d1_tile = 10;
static const unsigned d2_tile = 5;

CPPArrayFx()
: ctx(vfs_test_setup_.ctx())
CPPArrayFx(shared_ptr<tiledb_config_t> config = nullptr)
: vfs_test_setup_(config.get())
, ctx(vfs_test_setup_.ctx())
, array_uri_{vfs_test_setup_.array_uri("cpp_unit_array")} {
Domain domain(ctx);
auto d1 = Dimension::create<int>(ctx, "d1", {{-100, 100}}, d1_tile);
Expand Down Expand Up @@ -83,6 +84,18 @@ struct CPPArrayFx {
std::string array_uri_;
};

struct CPPArrayFxJsonSerialization : public CPPArrayFx {
static Config create_config() {
Config result;
result.set("rest.server_serialization_format", "JSON");
return result;
}

CPPArrayFxJsonSerialization()
: CPPArrayFx(create_config().ptr()) {
}
};

TEST_CASE("Config", "[cppapi][config][non-rest]") {
// Primarily to instansiate operator[]/= template
tiledb::Config cfg;
Expand All @@ -92,6 +105,27 @@ TEST_CASE("Config", "[cppapi][config][non-rest]") {
CHECK((std::string)cfg["vfs.s3.use_virtual_addressing"] == "true");
}

TEST_CASE_METHOD(
CPPArrayFxJsonSerialization,
"C++ API: Arrays, REST JSON serialization",
"[cppapi][basic][rest][json]") {
SECTION("Dimensions") {
ArraySchema schema(ctx, array_uri_);
CHECK(schema.domain().ndim() == 2);
auto a = schema.domain().dimensions()[0].domain<int>();
auto b = schema.domain().dimensions()[1].domain<int>();
CHECK_THROWS(schema.domain().dimensions()[0].domain<unsigned>());
CHECK(a.first == -100);
CHECK(a.second == 100);
CHECK(b.first == 0);
CHECK(b.second == 100);
CHECK_THROWS(schema.domain().dimensions()[0].tile_extent<unsigned>() == 10);
CHECK(schema.domain().dimensions()[0].tile_extent<int>() == 10);
CHECK(schema.domain().dimensions()[1].tile_extent<int>() == 5);
CHECK(schema.domain().cell_num() == 20301);
}
}

TEST_CASE_METHOD(CPPArrayFx, "C++ API: Arrays", "[cppapi][basic][rest]") {
SECTION("Dimensions") {
ArraySchema schema(ctx, array_uri_);
Expand Down
16 changes: 0 additions & 16 deletions tiledb/sm/buffer/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -556,22 +556,6 @@ class SerializationBuffer {
buffer_ = iter;
}

/**
* Assigns a new owned buffer to the object, and appends a null terminator.
*/
template <class Iter>
inline void assign_null_terminated(const Iter& iter) {
// Clear vector and deallocate its buffer.
buffer_owner_.clear();
buffer_owner_.shrink_to_fit();
// Reserve enough space for the data and the null terminator.
buffer_owner_.reserve(
std::distance(std::cbegin(iter), std::cend(iter)) + 1);
buffer_owner_.assign(std::cbegin(iter), std::cend(iter));
buffer_owner_.push_back('\0');
buffer_ = buffer_owner_;
}

/**
* Gets the number of bytes in the buffer.
*/
Expand Down
14 changes: 0 additions & 14 deletions tiledb/sm/buffer/test/unit_serialization_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,20 +84,6 @@ TEST_CASE(
CHECK_THROWS(buff.owned_mutable_span());
}

TEST_CASE(
"SerializationBuffer: Test owned null-terminated buffer",
"[serialization_buffer][owned][null_terminated]") {
SerializationBuffer buff{
tiledb::test::get_test_memory_tracker()->get_resource(
MemoryType::SERIALIZATION_BUFFER)};
std::string_view data = "abcd";
buff.assign_null_terminated(data);
span<const char> buff_span = buff;
CHECK(buff.size() == data.size() + 1);
CHECK(memcmp(buff_span.data(), data.data(), data.size()) == 0);
CHECK(static_cast<span<const char>>(buff)[buff.size() - 1] == '\0');
}

TEST_CASE(
"SerializationBuffer: Test memory tracking",
"[serialization_buffer][memory_tracking]") {
Expand Down
35 changes: 0 additions & 35 deletions tiledb/sm/rest/rest_client_remote.cc
Original file line number Diff line number Diff line change
Expand Up @@ -217,10 +217,6 @@ RestClientRemote::get_array_schema_from_rest(const URI& uri) {
"Error getting array schema from REST; server returned no data.")),
nullopt};

// Ensure data has a null delimiter for cap'n proto if using JSON
RETURN_NOT_OK_TUPLE(
ensure_json_null_delimited_string(&returned_data), nullopt);

auto array_schema = serialization::array_schema_deserialize(
serialization_type_, returned_data, memory_tracker_);

Expand Down Expand Up @@ -271,8 +267,6 @@ RestClientRemote::post_array_schema_from_rest(
"Error getting array schema from REST; server returned no data.");
}

// Ensure data has a null delimiter for cap'n proto if using JSON
throw_if_not_ok(ensure_json_null_delimited_string(&returned_data));
return serialization::deserialize_load_array_schema_response(
uri, config, serialization_type_, returned_data, memory_tracker_);
}
Expand Down Expand Up @@ -355,8 +349,6 @@ void RestClientRemote::post_array_from_rest(
"Error getting array from REST; server returned no data.");
}

// Ensure data has a null delimiter for cap'n proto if using JSON
throw_if_not_ok(ensure_json_null_delimited_string(&returned_data));
serialization::array_deserialize(
array, serialization_type_, returned_data, resources, memory_tracker_);
}
Expand Down Expand Up @@ -489,9 +481,6 @@ Status RestClientRemote::get_array_non_empty_domain(
Status_RestError("Error getting array non-empty domain "
"from REST; server returned no data."));

// Ensure data has a null delimiter for cap'n proto if using JSON
RETURN_NOT_OK(ensure_json_null_delimited_string(&returned_data));

// Deserialize data returned
return serialization::nonempty_domain_deserialize(
array, returned_data, serialization_type_);
Expand Down Expand Up @@ -527,8 +516,6 @@ Status RestClientRemote::get_array_metadata_from_rest(
return LOG_STATUS(Status_RestError(
"Error getting array metadata from REST; server returned no data."));

// Ensure data has a null delimiter for cap'n proto if using JSON
RETURN_NOT_OK(ensure_json_null_delimited_string(&returned_data));
return serialization::metadata_deserialize(
array->unsafe_metadata(),
array->config(),
Expand Down Expand Up @@ -613,8 +600,6 @@ RestClientRemote::post_enumerations_from_rest(
"Error getting enumerations from REST; server returned no data.");
}

// Ensure data has a null delimiter for cap'n proto if using JSON
throw_if_not_ok(ensure_json_null_delimited_string(&returned_data));
return serialization::deserialize_load_enumerations_response(
array_schema, config, serialization_type_, returned_data, memory_tracker);
}
Expand Down Expand Up @@ -669,8 +654,6 @@ void RestClientRemote::post_query_plan_from_rest(
"Error getting query plan from REST; server returned no data.");
}

// Ensure data has a null delimiter for cap'n proto if using JSON
throw_if_not_ok(ensure_json_null_delimited_string(&returned_data));
query_plan = serialization::deserialize_query_plan_response(
query, serialization_type_, returned_data);
}
Expand Down Expand Up @@ -1191,8 +1174,6 @@ Status RestClientRemote::get_query_est_result_sizes(
"Error getting array metadata from REST; server returned no data."));
}

// Ensure data has a null delimiter for cap'n proto if using JSON
RETURN_NOT_OK(ensure_json_null_delimited_string(&returned_data));
return serialization::query_est_result_size_deserialize(
query, serialization_type_, true, returned_data);
}
Expand Down Expand Up @@ -1266,8 +1247,6 @@ Status RestClientRemote::post_fragment_info_from_rest(
return LOG_STATUS(Status_RestError(
"Error getting fragment info from REST; server returned no data."));

// Ensure data has a null delimiter for cap'n proto if using JSON
RETURN_NOT_OK(ensure_json_null_delimited_string(&returned_data));
return serialization::fragment_info_deserialize(
fragment_info, serialization_type_, uri, returned_data, memory_tracker_);
}
Expand Down Expand Up @@ -1308,8 +1287,6 @@ Status RestClientRemote::post_group_metadata_from_rest(
"Error getting group metadata from REST; server returned no data."));
}

// Ensure data has a null delimiter for cap'n proto if using JSON
RETURN_NOT_OK(ensure_json_null_delimited_string(&returned_data));
return serialization::metadata_deserialize(
group->unsafe_metadata(),
group->config(),
Expand Down Expand Up @@ -1407,8 +1384,6 @@ Status RestClientRemote::post_group_from_rest(const URI& uri, Group* group) {
return LOG_STATUS(Status_RestError(
"Error getting group from REST; server returned no data."));

// Ensure data has a null delimiter for cap'n proto if using JSON
RETURN_NOT_OK(ensure_json_null_delimited_string(&returned_data));
return serialization::group_details_deserialize(
group, serialization_type_, returned_data);
}
Expand Down Expand Up @@ -1458,14 +1433,6 @@ void RestClientRemote::delete_group_from_rest(const URI& uri, bool recursive) {
stats_, url, serialization_type_, &returned_data, cache_key));
}

Status RestClientRemote::ensure_json_null_delimited_string(Buffer* buffer) {
if (serialization_type_ == SerializationType::JSON &&
buffer->value<char>(buffer->size() - 1) != '\0') {
RETURN_NOT_OK(buffer->write("\0", sizeof(char)));
}
return Status::Ok();
}

Status RestClientRemote::post_consolidation_to_rest(
const URI& uri, const Config& config) {
BufferList serialized{memory_tracker_};
Expand Down Expand Up @@ -1545,8 +1512,6 @@ RestClientRemote::post_consolidation_plan_from_rest(
"Error getting query plan from REST; server returned no data.");
}

// Ensure data has a null delimiter for cap'n proto if using JSON
throw_if_not_ok(ensure_json_null_delimited_string(&returned_data));
return serialization::deserialize_consolidation_plan_response(
serialization_type_, returned_data);
}
Expand Down
9 changes: 0 additions & 9 deletions tiledb/sm/rest/rest_client_remote.h
Original file line number Diff line number Diff line change
Expand Up @@ -577,15 +577,6 @@ class RestClientRemote : public RestClient {
* @return Returns the redirection URI if exists and empty string otherwise
*/
std::string redirect_uri(const std::string& cache_key);

/**
* Cap'n proto requires JSON messages to be null terminated c-style strings.
* This function checks if using JSON that returned buffers are null delimited
*
* @param buffer of server message
* @return Status
*/
Status ensure_json_null_delimited_string(Buffer* buffer);
};

} // namespace tiledb::sm
20 changes: 6 additions & 14 deletions tiledb/sm/serialization/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ Status metadata_serialize(
case SerializationType::JSON: {
::capnp::JsonCodec json;
kj::String capnp_json = json.encode(builder);
serialized_buffer.assign_null_terminated(capnp_json);
serialized_buffer.assign(capnp_json);
break;
}
case SerializationType::CAPNP: {
Expand Down Expand Up @@ -447,12 +447,9 @@ Status metadata_deserialize(
try {
switch (serialize_type) {
case SerializationType::JSON: {
::capnp::JsonCodec json;
::capnp::MallocMessageBuilder message_builder;
auto builder = message_builder.initRoot<capnp::ArrayMetadata>();
json.decode(
kj::StringPtr(static_cast<const char*>(serialized_buffer.data())),
builder);
utils::decode_json_message(serialized_buffer, builder);
auto reader = builder.asReader();

// Deserialize
Expand Down Expand Up @@ -515,7 +512,7 @@ Status array_serialize(
case SerializationType::JSON: {
::capnp::JsonCodec json;
kj::String capnp_json = json.encode(ArrayBuilder);
serialized_buffer.assign_null_terminated(capnp_json);
serialized_buffer.assign(capnp_json);
break;
}
case SerializationType::CAPNP: {
Expand Down Expand Up @@ -551,13 +548,10 @@ void array_deserialize(
try {
switch (serialize_type) {
case SerializationType::JSON: {
::capnp::JsonCodec json;
::capnp::MallocMessageBuilder message_builder;
capnp::Array::Builder array_builder =
message_builder.initRoot<capnp::Array>();
json.decode(
kj::StringPtr(static_cast<const char*>(serialized_buffer.data())),
array_builder);
utils::decode_json_message(serialized_buffer, array_builder);
capnp::Array::Reader array_reader = array_builder.asReader();
array_from_capnp(array_reader, resources, array, true, memory_tracker);
break;
Expand Down Expand Up @@ -612,7 +606,7 @@ Status array_open_serialize(
case SerializationType::JSON: {
::capnp::JsonCodec json;
kj::String capnp_json = json.encode(arrayOpenBuilder);
serialized_buffer.assign_null_terminated(capnp_json);
serialized_buffer.assign(capnp_json);
break;
}
case SerializationType::CAPNP: {
Expand Down Expand Up @@ -652,9 +646,7 @@ Status array_open_deserialize(
::capnp::MallocMessageBuilder message_builder;
capnp::ArrayOpen::Builder array_open_builder =
message_builder.initRoot<capnp::ArrayOpen>();
json.decode(
kj::StringPtr(static_cast<const char*>(serialized_buffer.data())),
array_open_builder);
utils::decode_json_message(serialized_buffer, array_open_builder, json);
capnp::ArrayOpen::Reader array_open_reader =
array_open_builder.asReader();
RETURN_NOT_OK(array_open_from_capnp(array_open_reader, array));
Expand Down
Loading

0 comments on commit 414a261

Please sign in to comment.