Skip to content

Commit

Permalink
[Backport release-2.26] Add serialization and API changes for post_ar…
Browse files Browse the repository at this point in the history
…ray_schema_from_rest. (#5237) (#5261)

Backport of #5237 to release-2.26

---
TYPE: IMPROVEMENT
DESC: Add serialization and API changes for post_array_schema_from_rest.
  • Loading branch information
shaunrd0 authored Aug 28, 2024
1 parent 4487393 commit 6460a8b
Show file tree
Hide file tree
Showing 25 changed files with 646 additions and 125 deletions.
2 changes: 1 addition & 1 deletion test/src/unit-capi-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ void check_save_to_file() {
ss << "rest.curl.buffer_size 524288\n";
ss << "rest.curl.verbose false\n";
ss << "rest.http_compressor any\n";
ss << "rest.load_enumerations_on_array_open true\n";
ss << "rest.load_enumerations_on_array_open false\n";
ss << "rest.load_metadata_on_array_open true\n";
ss << "rest.load_non_empty_domain_on_array_open true\n";
ss << "rest.retry_count 25\n";
Expand Down
63 changes: 63 additions & 0 deletions test/src/unit-enumerations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1119,6 +1119,69 @@ TEST_CASE_METHOD(
REQUIRE(schema->is_enumeration_loaded("test_enmr") == true);
}

TEST_CASE_METHOD(
EnumerationFx,
"Array - Load All Enumerations - All Schemas",
"[enumeration][array][load-all-enumerations][all-schemas]") {
create_array();
auto array = get_array(QueryType::READ);
auto schema = array->array_schema_latest_ptr();
REQUIRE(schema->is_enumeration_loaded("test_enmr") == false);
std::string schema_name_1 = schema->name();

// Evolve once to add an enumeration.
auto ase = make_shared<ArraySchemaEvolution>(HERE(), memory_tracker_);
std::vector<std::string> var_values{"one", "two", "three"};
auto var_enmr = create_enumeration(
var_values, false, Datatype::STRING_ASCII, "ase_var_enmr");
ase->add_enumeration(var_enmr);
auto attr4 = make_shared<Attribute>(HERE(), "attr4", Datatype::UINT16);
attr4->set_enumeration_name("ase_var_enmr");
CHECK_NOTHROW(ase->evolve_schema(schema));
// Apply evolution to the array and reopen.
CHECK_NOTHROW(Array::evolve_array_schema(
ctx_.resources(), uri_, ase.get(), array->get_encryption_key()));
CHECK(array->reopen().ok());
CHECK_NOTHROW(array->load_all_enumerations());
auto all_schemas = array->array_schemas_all();
schema = array->array_schema_latest_ptr();
std::string schema_name_2 = schema->name();

// Check all schemas.
CHECK(all_schemas[schema_name_1]->is_enumeration_loaded("test_enmr") == true);
CHECK(all_schemas[schema_name_2]->is_enumeration_loaded("test_enmr") == true);
CHECK(
all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") ==
true);

// Evolve a second time to drop an enumeration.
ase = make_shared<ArraySchemaEvolution>(HERE(), memory_tracker_);
ase->drop_enumeration("test_enmr");
ase->drop_attribute("attr1");
CHECK_NOTHROW(ase->evolve_schema(schema));
// Apply evolution to the array and reopen.
CHECK_NOTHROW(Array::evolve_array_schema(
ctx_.resources(), uri_, ase.get(), array->get_encryption_key()));
CHECK(array->reopen().ok());
CHECK_NOTHROW(array->load_all_enumerations());
all_schemas = array->array_schemas_all();
schema = array->array_schema_latest_ptr();
std::string schema_name_3 = schema->name();

// Check all schemas.
CHECK(all_schemas[schema_name_1]->is_enumeration_loaded("test_enmr") == true);
CHECK(all_schemas[schema_name_2]->is_enumeration_loaded("test_enmr") == true);
CHECK(
all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") ==
true);
CHECK_THROWS_WITH(
all_schemas[schema_name_3]->is_enumeration_loaded("test_enmr"),
Catch::Matchers::ContainsSubstring("No enumeration named"));
CHECK(
all_schemas[schema_name_3]->is_enumeration_loaded("ase_var_enmr") ==
true);
}

TEST_CASE_METHOD(
EnumerationFx,
"Array - Load All Enumerations - Repeated",
Expand Down
114 changes: 100 additions & 14 deletions test/src/unit-request-handlers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

#ifdef TILEDB_SERIALIZATION

#include "test/support/src/helpers.h"
#include "test/support/src/mem_helpers.h"
#include "test/support/tdb_catch.h"
#include "tiledb/api/c_api/buffer/buffer_api_internal.h"
Expand All @@ -41,6 +42,7 @@
#include "tiledb/sm/c_api/tiledb_serialization.h"
#include "tiledb/sm/c_api/tiledb_struct_def.h"
#include "tiledb/sm/cpp_api/tiledb"
#include "tiledb/sm/cpp_api/tiledb_experimental"
#include "tiledb/sm/crypto/encryption_key.h"
#include "tiledb/sm/enums/array_type.h"
#include "tiledb/sm/enums/encryption_type.h"
Expand All @@ -67,6 +69,7 @@ struct RequestHandlerFx {
Config cfg_;
Context ctx_;
EncryptionKey enc_key_;
shared_ptr<ArraySchema> schema_;
};

struct HandleLoadArraySchemaRequestFx : RequestHandlerFx {
Expand All @@ -75,11 +78,17 @@ struct HandleLoadArraySchemaRequestFx : RequestHandlerFx {
}

virtual shared_ptr<ArraySchema> create_schema() override;
shared_ptr<ArraySchema> call_handler(

std::tuple<
shared_ptr<ArraySchema>,
std::unordered_map<std::string, shared_ptr<ArraySchema>>>
call_handler(
serialization::LoadArraySchemaRequest req, SerializationType stype);

shared_ptr<const Enumeration> create_string_enumeration(
std::string name, std::vector<std::string>& values);

shared_ptr<ArraySchema> schema_add_attribute(const std::string& attr_name);
};

struct HandleQueryPlanRequestFx : RequestHandlerFx {
Expand Down Expand Up @@ -116,15 +125,23 @@ struct HandleConsolidationPlanRequestFx : RequestHandlerFx {

TEST_CASE_METHOD(
HandleLoadArraySchemaRequestFx,
"tiledb_handle_load_array_schema_request - default request",
"tiledb_handle_load_array_schema_request - no enumerations",
"[request_handler][load_array_schema][default]") {
auto stype = GENERATE(SerializationType::JSON, SerializationType::CAPNP);

create_array();
auto schema =
call_handler(serialization::LoadArraySchemaRequest(false), stype);
auto schema_response =
call_handler(serialization::LoadArraySchemaRequest(cfg_), stype);
auto schema = std::get<0>(schema_response);
REQUIRE(schema->has_enumeration("enmr"));
REQUIRE(schema->get_loaded_enumeration_names().size() == 0);
tiledb::test::schema_equiv(*schema, *schema_);

// We did not evolve the schema so there should only be one.
auto all_schemas = std::get<1>(schema_response);
REQUIRE(all_schemas.size() == 1);
tiledb::test::schema_equiv(
*all_schemas.find(schema->name())->second, *schema_);
}

TEST_CASE_METHOD(
Expand All @@ -134,12 +151,57 @@ TEST_CASE_METHOD(
auto stype = GENERATE(SerializationType::JSON, SerializationType::CAPNP);

create_array();
auto schema =
call_handler(serialization::LoadArraySchemaRequest(true), stype);
REQUIRE(cfg_.set("rest.load_enumerations_on_array_open", "true").ok());
auto schema_response =
call_handler(serialization::LoadArraySchemaRequest(cfg_), stype);
auto schema = std::get<0>(schema_response);
REQUIRE(schema->has_enumeration("enmr"));
REQUIRE(schema->get_loaded_enumeration_names().size() == 1);
REQUIRE(schema->get_loaded_enumeration_names()[0] == "enmr");
REQUIRE(schema->get_enumeration("enmr") != nullptr);
tiledb::test::schema_equiv(*schema, *schema_);

// We did not evolve the schema so there should only be one.
auto all_schemas = std::get<1>(schema_response);
REQUIRE(all_schemas.size() == 1);
tiledb::test::schema_equiv(
*all_schemas.find(schema->name())->second, *schema_);
}

TEST_CASE_METHOD(
HandleLoadArraySchemaRequestFx,
"tiledb_handle_load_array_schema_request - multiple schemas",
"[request_handler][load_array_schema][schema-evolution]") {
auto stype = GENERATE(SerializationType::JSON, SerializationType::CAPNP);
std::string load_enums = GENERATE("true", "false");

create_array();

std::vector<shared_ptr<ArraySchema>> all_schemas{schema_};
all_schemas.push_back(schema_add_attribute("b"));
all_schemas.push_back(schema_add_attribute("c"));
all_schemas.push_back(schema_add_attribute("d"));

REQUIRE(cfg_.set("rest.load_enumerations_on_array_open", load_enums).ok());
auto schema_response =
call_handler(serialization::LoadArraySchemaRequest(cfg_), stype);
auto schema = std::get<0>(schema_response);
if (load_enums == "true") {
REQUIRE(schema->has_enumeration("enmr"));
REQUIRE(schema->get_loaded_enumeration_names().size() == 1);
REQUIRE(schema->get_loaded_enumeration_names()[0] == "enmr");
REQUIRE(schema->get_enumeration("enmr") != nullptr);
}
// The latest schema should be equal to the last applied evolution.
tiledb::test::schema_equiv(*schema, *all_schemas.back());

// Validate schemas returned from the request in the order they were created.
auto r_all_schemas = std::get<1>(schema_response);
std::map<std::string, shared_ptr<ArraySchema>> resp(
r_all_schemas.begin(), r_all_schemas.end());
for (int i = 0; const auto& s : resp) {
tiledb::test::schema_equiv(*s.second, *all_schemas[i++]);
}
}

TEST_CASE_METHOD(
Expand Down Expand Up @@ -346,7 +408,9 @@ TEST_CASE_METHOD(
RequestHandlerFx::RequestHandlerFx(const std::string uri)
: memory_tracker_(tiledb::test::create_test_memory_tracker())
, uri_(uri)
, ctx_(cfg_) {
, ctx_(cfg_)
, schema_(make_shared<ArraySchema>(
ArrayType::DENSE, ctx_.resources().ephemeral_memory_tracker())) {
delete_array();
throw_if_not_ok(enc_key_.set_key(EncryptionType::NO_ENCRYPTION, nullptr, 0));
}
Expand Down Expand Up @@ -405,9 +469,28 @@ HandleLoadArraySchemaRequestFx::create_string_enumeration(
tiledb::test::create_test_memory_tracker());
}

shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::schema_add_attribute(
const std::string& attr_name) {
tiledb::Context ctx;
tiledb::ArraySchemaEvolution ase(ctx);
auto attr = tiledb::Attribute::create<int32_t>(ctx, attr_name);
ase.add_attribute(attr);
// Evolve and update the original schema member variable.
schema_ = ase.ptr()->array_schema_evolution_->evolve_schema(schema_);
// Apply the schema evolution.
Array::evolve_array_schema(
this->ctx_.resources(),
this->uri_,
ase.ptr()->array_schema_evolution_,
this->enc_key_);

// Return the new evolved schema for validation.
return schema_;
}

shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::create_schema() {
// Create a schema to serialize
auto schema =
schema_ =
make_shared<ArraySchema>(HERE(), ArrayType::SPARSE, memory_tracker_);
auto dim =
make_shared<Dimension>(HERE(), "dim1", Datatype::INT32, memory_tracker_);
Expand All @@ -416,20 +499,23 @@ shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::create_schema() {

auto dom = make_shared<Domain>(HERE(), memory_tracker_);
throw_if_not_ok(dom->add_dimension(dim));
throw_if_not_ok(schema->set_domain(dom));
throw_if_not_ok(schema_->set_domain(dom));

std::vector<std::string> values = {"pig", "cow", "chicken", "dog", "cat"};
auto enmr = create_string_enumeration("enmr", values);
schema->add_enumeration(enmr);
schema_->add_enumeration(enmr);

auto attr = make_shared<Attribute>(HERE(), "attr", Datatype::INT32);
attr->set_enumeration_name("enmr");
throw_if_not_ok(schema->add_attribute(attr));
throw_if_not_ok(schema_->add_attribute(attr));

return schema;
return schema_;
}

shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::call_handler(
std::tuple<
shared_ptr<ArraySchema>,
std::unordered_map<std::string, shared_ptr<ArraySchema>>>
HandleLoadArraySchemaRequestFx::call_handler(
serialization::LoadArraySchemaRequest req, SerializationType stype) {
// If this looks weird, its because we're using the public C++ API to create
// these objets instead of the internal APIs elsewhere in this test suite.
Expand All @@ -451,7 +537,7 @@ shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::call_handler(
REQUIRE(rval == TILEDB_OK);

return serialization::deserialize_load_array_schema_response(
stype, resp_buf->buffer(), memory_tracker_);
uri_, stype, resp_buf->buffer(), memory_tracker_);
}

shared_ptr<ArraySchema> HandleQueryPlanRequestFx::create_schema() {
Expand Down
20 changes: 20 additions & 0 deletions test/support/src/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1621,6 +1621,26 @@ void read_sparse_v11(
tiledb_query_free(&query);
}

void schema_equiv(
const sm::ArraySchema& schema1, const sm::ArraySchema& schema2) {
CHECK(schema1.array_type() == schema2.array_type());
CHECK(schema1.attributes().size() == schema2.attributes().size());
for (unsigned int i = 0; i < schema2.attribute_num(); i++) {
auto a = schema1.attribute(i);
auto b = schema2.attribute(i);
CHECK(a->cell_val_num() == b->cell_val_num());
CHECK(a->name() == b->name());
CHECK(a->type() == b->type());
CHECK(a->nullable() == b->nullable());
CHECK(a->get_enumeration_name() == b->get_enumeration_name());
}
CHECK(schema1.capacity() == schema2.capacity());
CHECK(schema1.cell_order() == schema2.cell_order());
CHECK(schema1.tile_order() == schema2.tile_order());
CHECK(schema1.allows_dups() == schema2.allows_dups());
CHECK(schema1.array_uri().to_string() == schema2.array_uri().to_string());
}

template void check_subarray<int8_t>(
tiledb::sm::Subarray& subarray, const SubarrayRanges<int8_t>& ranges);
template void check_subarray<uint8_t>(
Expand Down
9 changes: 9 additions & 0 deletions test/support/src/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -957,6 +957,15 @@ void write_sparse_v11(
*/
void read_sparse_v11(
tiledb_ctx_t* ctx, const std::string& array_name, uint64_t timestamp);

/**
* Helper function to test two array schemas are equivalent.
*
* @param schema1 Expected array schema.
* @param schema2 Actual array schema.
*/
void schema_equiv(
const sm::ArraySchema& schema1, const sm::ArraySchema& schema2);
} // namespace tiledb::test

#endif
6 changes: 5 additions & 1 deletion tiledb/api/c_api/config/config_api_external.h
Original file line number Diff line number Diff line change
Expand Up @@ -740,13 +740,17 @@ TILEDB_EXPORT void tiledb_config_free(tiledb_config_t** config) TILEDB_NOEXCEPT;
* If true, array non empty domain will be loaded and sent to server together
* with the open array <br>
* **Default**: true
* - `rest.load_enumerations_on_array_open` <br>
* If true, enumerations will be loaded and sent to server together with
* the open array.
* **Default**: false
* - `rest.use_refactored_array_open` <br>
* If true, the new REST routes and APIs for opening an array
* will be used <br>
* **Default**: true
* - `rest.use_refactored_array_open_and_query_submit` <br>
* If true, the new REST routes and APIs for opening an array and submitting
* a query will be used <br>
* a query will be used <br>
* **Default**: true
* - `rest.curl.buffer_size` <br>
* Set curl buffer size for REST requests <br>
Expand Down
Loading

0 comments on commit 6460a8b

Please sign in to comment.