Skip to content

Commit

Permalink
Add ArraySchemaEvolution::extend_enumeration
Browse files Browse the repository at this point in the history
This commit adds the new ArraySchemaEvolution::extend_enumeration API
for extending enumerations during array schema evolution. Enumerations
passed to this API should come from the result of a call to
Enumeration::extend.
  • Loading branch information
davisp committed Oct 23, 2023
1 parent d2fb8e4 commit 263f57e
Show file tree
Hide file tree
Showing 14 changed files with 714 additions and 53 deletions.
29 changes: 29 additions & 0 deletions test/src/unit-cppapi-enumerations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,35 @@ TEST_CASE_METHOD(
REQUIRE(rc != TILEDB_OK);
}

TEST_CASE_METHOD(
CPPEnumerationFx,
"CPP: ArraySchemaEvolution - Extend Enumeration",
"[enumeration][array-schema-evolution][extend-enumeration]") {
ArraySchemaEvolution ase(ctx_);
std::vector<std::string> values = {"fred", "wilma", "barney", "pebbles"};
auto enmr = Enumeration::create(ctx_, enmr_name, values);
CHECK_NOTHROW(ase.extend_enumeration(enmr));
}


TEST_CASE_METHOD(
CPPEnumerationFx,
"C API: ArraySchemaEvolution - Extend Enumeration - Check nullptr",
"[enumeration][array-schema-evolution][drop-enumeration]") {

std::vector<std::string> values = {"fred", "wilma", "barney", "pebbles"};
auto enmr = Enumeration::create(ctx_, enmr_name, values);

auto rc = tiledb_array_schema_evolution_extend_enumeration(
ctx_.ptr().get(), nullptr, enmr.ptr().get());
REQUIRE(rc != TILEDB_OK);

ArraySchemaEvolution ase(ctx_);
rc = tiledb_array_schema_evolution_extend_enumeration(
ctx_.ptr().get(), ase.ptr().get(), nullptr);
REQUIRE(rc != TILEDB_OK);
}

TEST_CASE_METHOD(
CPPEnumerationFx,
"CPP: ArraySchemaEvolution - Drop Enumeration",
Expand Down
260 changes: 251 additions & 9 deletions test/src/unit-enumerations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ struct EnumerationFx {
Datatype type = static_cast<Datatype>(255),
std::string enmr_name = default_enmr_name);

shared_ptr<const Enumeration> create_empty_enumeration(
Datatype type,
uint32_t cell_val_num,
bool ordered = false,
std::string enmr_name = default_enmr_name);

template <typename T>
shared_ptr<const Enumeration> extend_enumeration(
shared_ptr<const Enumeration> enmr, const std::vector<T>& values);
Expand Down Expand Up @@ -712,6 +718,19 @@ TEST_CASE_METHOD(
REQUIRE(enmr2->is_extension_of(enmr1));
}

TEST_CASE_METHOD(
EnumerationFx,
"Enumeration Extension Empty Fixed Size",
"[enumeration][extension][fixed]") {
std::vector<int> values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
auto enmr1 = create_empty_enumeration(Datatype::INT32, 1);
auto enmr2 = extend_enumeration(enmr1, values);
check_enumeration(
enmr2, default_enmr_name, values, Datatype::INT32, 1, false);
REQUIRE(!enmr1->is_extension_of(enmr2));
REQUIRE(enmr2->is_extension_of(enmr1));
}

TEST_CASE_METHOD(
EnumerationFx,
"Enumeration Extension Fixed Size Multi-Cell Value",
Expand Down Expand Up @@ -755,6 +774,25 @@ TEST_CASE_METHOD(
REQUIRE(enmr2->is_extension_of(enmr1));
}

TEST_CASE_METHOD(
EnumerationFx,
"Enumeration Extension Empty Var Size",
"[enumeration][extension][var-sized]") {
std::vector<std::string> values = {"fred", "wilma", "barney", "betty"};
auto enmr1 =
create_empty_enumeration(Datatype::STRING_ASCII, constants::var_num);
auto enmr2 = extend_enumeration(enmr1, values);
check_enumeration(
enmr2,
default_enmr_name,
values,
Datatype::STRING_ASCII,
constants::var_num,
false);
REQUIRE(!enmr1->is_extension_of(enmr2));
REQUIRE(enmr2->is_extension_of(enmr1));
}

TEST_CASE_METHOD(
EnumerationFx,
"Enumeration Extension Invalid Data",
Expand Down Expand Up @@ -1411,6 +1449,90 @@ TEST_CASE_METHOD(
REQUIRE_THROWS(schema->drop_enumeration("not_an_enumeration"));
}

TEST_CASE_METHOD(
EnumerationFx,
"ArraySchema - Extend Enumeration - Enumeration is nullptr",
"[enumeration][array-schema][error]") {
create_array();
auto schema = get_array_schema_latest();
auto matcher = Catch::Matchers::ContainsSubstring(
"Error adding enumeration. Enumeration must not be nullptr.");
REQUIRE_THROWS_WITH(schema->extend_enumeration(nullptr), matcher);
}

TEST_CASE_METHOD(
EnumerationFx,
"ArraySchema - Extend Enumeration - Enumeration Does Not Exist",
"[enumeration][array-schema][error]") {
create_array();
auto schema = get_array_schema_latest();
auto enmr = create_empty_enumeration(Datatype::INT32, 1, false, "foo");
auto matcher = Catch::Matchers::ContainsSubstring(
"Enumeration with name 'foo' does not exist in this ArraySchema.");
REQUIRE_THROWS_WITH(schema->extend_enumeration(enmr), matcher);
}

TEST_CASE_METHOD(
EnumerationFx,
"ArraySchema - Extend Enumeration - Enumeration Not Loaded",
"[enumeration][array-schema][error]") {
create_array();
auto schema = get_array_schema_latest();
auto enmr = create_empty_enumeration(Datatype::INT32, 1, false, "test_enmr");
auto matcher = Catch::Matchers::ContainsSubstring(
"Enumeration with name 'test_enmr' is not loaded.");
REQUIRE_THROWS_WITH(schema->extend_enumeration(enmr), matcher);
}

TEST_CASE_METHOD(
EnumerationFx,
"ArraySchema - Extend Enumeration - Enumeration Not An Extension",
"[enumeration][array-schema][error]") {
create_array();
auto array = get_array(QueryType::READ);
array->load_all_enumerations();

auto schema = make_shared<ArraySchema>(HERE(), array->array_schema_latest());
auto enmr = create_empty_enumeration(Datatype::INT32, 1, false, "test_enmr");

auto matcher = Catch::Matchers::ContainsSubstring(
"Provided enumeration is not an extension of the current state of "
"'test_enmr'");
REQUIRE_THROWS_WITH(schema->extend_enumeration(enmr), matcher);
}

TEST_CASE_METHOD(
EnumerationFx,
"ArraySchema - Extend Enumeration - Duplicate Enumeration Path Name",
"[enumeration][array-schema][error]") {
create_array();
auto array = get_array(QueryType::READ);
array->load_all_enumerations();

auto schema = make_shared<ArraySchema>(HERE(), array->array_schema_latest());
auto enmr1 = schema->get_enumeration("test_enmr");

std::vector<std::string> extra_values = {"manatee", "narwhal", "oppossum"};
auto enmr2 = extend_enumeration(enmr1, extra_values);

// We have to force this condition by hand
auto enmr3 = tiledb::sm::Enumeration::create(
enmr2->name(),
// Notice we're reusing the existing path name from enmr1
enmr1->path_name(),
enmr2->type(),
enmr2->cell_val_num(),
enmr2->ordered(),
enmr2->data().data(),
enmr2->data().size(),
enmr2->offsets().data(),
enmr2->offsets().size());

auto matcher = Catch::Matchers::ContainsSubstring(
"Enumeration path name for 'test_enmr' already exists in this schema.");
REQUIRE_THROWS_WITH(schema->extend_enumeration(enmr3), matcher);
}

/* ********************************* */
/* Testing ArraySchemaEvolution */
/* ********************************* */
Expand Down Expand Up @@ -1509,10 +1631,37 @@ TEST_CASE_METHOD(
CHECK_NOTHROW(ase->evolve_schema(orig_schema));
}

TEST_CASE_METHOD(
EnumerationFx,
"ArraySchemaEvolution - Enumeration to Extend",
"[enumeration][array-schema-evolution][enmr-to-extend]") {
create_array();
auto array = get_array(QueryType::READ);
array->load_all_enumerations();
auto orig_schema = array->array_schema_latest_ptr();

std::vector<std::string> values_to_add = {"firefly", "gerbil", "hamster"};
auto old_enmr = orig_schema->get_enumeration("test_enmr");
REQUIRE(old_enmr != nullptr);
auto new_enmr = extend_enumeration(old_enmr, values_to_add);

auto ase = make_shared<ArraySchemaEvolution>(HERE());
ase->extend_enumeration(new_enmr);
CHECK_NOTHROW(ase->evolve_schema(orig_schema));
}

TEST_CASE_METHOD(
EnumerationFx,
"ArraySchemaEvolution - Drop Enumeration",
"[enumeration][array-schema-evolution][enmr-to-add]") {
"[enumeration][array-schema-evolution][enmr-to-drop]") {
auto ase = make_shared<ArraySchemaEvolution>(HERE());
CHECK_NOTHROW(ase->drop_enumeration("test_enmr"));
}

TEST_CASE_METHOD(
EnumerationFx,
"ArraySchemaEvolution - Add Then Drop Enumeration",
"[enumeration][array-schema-evolution][enmr-to-drop]") {
create_array();
auto orig_schema = get_array_schema_latest();
auto ase1 = make_shared<ArraySchemaEvolution>(HERE());
Expand All @@ -1529,14 +1678,6 @@ TEST_CASE_METHOD(
CHECK_NOTHROW(ase2->evolve_schema(new_schema));
}

TEST_CASE_METHOD(
EnumerationFx,
"ArraySchemaEvolution - Drop Enumeration",
"[enumeration][array-schema-evolution][enmr-to-drop]") {
auto ase = make_shared<ArraySchemaEvolution>(HERE());
CHECK_NOTHROW(ase->drop_enumeration("test_enmr"));
}

TEST_CASE_METHOD(
EnumerationFx,
"ArraySchemaEvolution - Drop Enumeration Repeated",
Expand Down Expand Up @@ -1664,6 +1805,30 @@ TEST_CASE_METHOD(
REQUIRE_THROWS(ase->evolve_schema(orig_schema));
}

TEST_CASE_METHOD(
EnumerationFx,
"ArraySchemaEvolution - Extend Enumeration nullptr",
"[enumeration][array-schema-evolution][extend][error]") {
auto ase = make_shared<ArraySchemaEvolution>(HERE());
auto matcher = Catch::Matchers::ContainsSubstring(
"Cannot extend enumeration; Input enumeration is null");
REQUIRE_THROWS_WITH(ase->extend_enumeration(nullptr), matcher);
}

TEST_CASE_METHOD(
EnumerationFx,
"ArraySchemaEvolution - Extend Enumeration Already Extended",
"[enumeration][array-schema-evolution][extend][error]") {
auto ase = make_shared<ArraySchemaEvolution>(HERE());
std::vector<int> values = {1, 2, 3, 4, 5};
auto enmr = create_enumeration(values);
auto matcher = Catch::Matchers::ContainsSubstring(
"Cannot extend enumeration; Input enumeration name has already "
"been extended in this evolution.");
REQUIRE_NOTHROW(ase->extend_enumeration(enmr));
REQUIRE_THROWS_WITH(ase->extend_enumeration(enmr), matcher);
}

/* ********************************* */
/* Testing QueryCondition */
/* ********************************* */
Expand Down Expand Up @@ -1708,6 +1873,45 @@ TEST_CASE_METHOD(
REQUIRE(data2.rvalue_as<int>() == 2);
}

TEST_CASE_METHOD(
EnumerationFx,
"QueryCondition - Rewrite Enumeration Value After Extension",
"[enumeration][query-condition][extend][rewrite-enumeration-value]") {
create_array();
auto array = get_array(QueryType::READ);
array->load_all_enumerations();

auto schema = array->array_schema_latest_ptr();

// Create two copies of the same query condition for assertions
auto qc1 = create_qc("attr1", std::string("gerbil"), QueryConditionOp::EQ);
auto qc2 = qc1;

// Check that we fail the rewrite before extension.
auto matcher = Catch::Matchers::ContainsSubstring(
"Enumeration value not found for field 'attr1'");
REQUIRE_THROWS_WITH(
qc1.rewrite_enumeration_conditions(*(schema.get())), matcher);

// Extend enumeration via schema evolution.
std::vector<std::string> values_to_add = {"firefly", "gerbil", "hamster"};
auto old_enmr = schema->get_enumeration("test_enmr");
auto new_enmr = extend_enumeration(old_enmr, values_to_add);

auto ase = make_shared<ArraySchemaEvolution>(HERE());
ase->extend_enumeration(new_enmr);
auto st = ctx_.storage_manager()->array_evolve_schema(
array->array_uri(), ase.get(), array->get_encryption_key());
throw_if_not_ok(st);

// Check that we can no rewrite the query condition.
array = get_array(QueryType::READ);
array->load_all_enumerations();
schema = array->array_schema_latest_ptr();

REQUIRE_NOTHROW(qc2.rewrite_enumeration_conditions(*(schema.get())));
}

TEST_CASE_METHOD(
EnumerationFx,
"QueryCondition - Skip enumeration rewrite",
Expand Down Expand Up @@ -1926,6 +2130,7 @@ TEST_CASE_METHOD(

auto enmrs_to_add1 = ase1.enumeration_names_to_add();
auto enmrs_to_add2 = ase2->enumeration_names_to_add();
REQUIRE(enmrs_to_add1.size() == 2);
REQUIRE(vec_cmp(enmrs_to_add1, enmrs_to_add2));

for (auto& name : enmrs_to_add1) {
Expand All @@ -1935,6 +2140,37 @@ TEST_CASE_METHOD(
}
}

TEST_CASE_METHOD(
EnumerationFx,
"Cap'N Proto - ArraySchemaEvolution Serialization With Extensions",
"[enumeration][capnp][basic][array-schema-evolution]") {
auto client_side = GENERATE(true, false);
auto ser_type = GENERATE(SerializationType::CAPNP, SerializationType::JSON);

std::vector<int> values1 = {1, 2, 3, 4, 5};
auto enmr1 = create_enumeration(values1, false, Datatype::INT32, "enmr1");

std::vector<double> values2 = {1.0, 2.0, 3.0, 4.0, 5.0};
auto enmr2 = create_enumeration(values2, true, Datatype::FLOAT64, "enmr2");

ArraySchemaEvolution ase1;
ase1.extend_enumeration(enmr1);
ase1.extend_enumeration(enmr2);

auto ase2 = ser_des_array_schema_evolution(&ase1, client_side, ser_type);

auto enmrs_to_extend1 = ase1.enumeration_names_to_extend();
auto enmrs_to_extend2 = ase2->enumeration_names_to_extend();
REQUIRE(enmrs_to_extend2.size() == 2);
REQUIRE(vec_cmp(enmrs_to_extend1, enmrs_to_extend2));

for (auto& name : enmrs_to_extend1) {
REQUIRE(ase1.enumeration_to_extend(name) != nullptr);
REQUIRE(ase2->enumeration_to_extend(name) != nullptr);
REQUIRE(ase1.enumeration_to_extend(name) != ase2->enumeration_to_extend(name));
}
}

TEST_CASE_METHOD(
EnumerationFx,
"Cap'N Proto - Basic Backwards Compatible Query Serialization",
Expand Down Expand Up @@ -2188,6 +2424,12 @@ shared_ptr<const Enumeration> EnumerationFx::create_enumeration(
}
}

shared_ptr<const Enumeration> EnumerationFx::create_empty_enumeration(
Datatype type, uint32_t cell_val_num, bool ordered, std::string name) {
return Enumeration::create(
name, type, cell_val_num, ordered, nullptr, 0, nullptr, 0);
}

template <typename T>
shared_ptr<const Enumeration> EnumerationFx::extend_enumeration(
shared_ptr<const Enumeration> enmr, const std::vector<T>& values) {
Expand Down
Loading

0 comments on commit 263f57e

Please sign in to comment.