Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement dense dimension aggregates. #4801

Merged
merged 2 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
716 changes: 329 additions & 387 deletions test/src/test-cppapi-aggregates.cc

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions tiledb/api/c_api/query_aggregate/query_aggregate_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,29 @@ capi_return_t tiledb_create_unary_aggregate(
tiledb_query_field_handle_t::make_handle(query, input_field_name);
tiledb_query_field_handle_t::break_handle(field);

const auto is_dense_dim{schema.dense() && schema.is_dim(field_name)};
const auto cell_order{schema.cell_order()};

// Get the dimension index for the dense case. It is used below to know if the
// dimenson to be aggregated is the last dimension for ROW_MAJOR or first for
// COL_MAJOR. This is used at the aggregate level to know if we need to change
// the dimension value when we move cells.
unsigned dim_idx = 0;
if (is_dense_dim) {
dim_idx = schema.domain().get_dimension_index(field_name);
}

const bool is_slab_dim =
is_dense_dim && (cell_order == sm::Layout::ROW_MAJOR) ?
(dim_idx == schema.dim_num() - 1) :
(dim_idx == 0);

auto fi = tiledb::sm::FieldInfo(
field_name,
schema.var_size(field_name),
schema.is_nullable(field_name),
is_dense_dim,
is_slab_dim,
schema.cell_val_num(field_name),
schema.type(field_name));

Expand Down
8 changes: 3 additions & 5 deletions tiledb/sm/array_schema/domain.cc
Original file line number Diff line number Diff line change
Expand Up @@ -532,16 +532,14 @@ Status Domain::has_dimension(const std::string& name, bool* has_dim) const {
return Status::Ok();
}

Status Domain::get_dimension_index(
const std::string& name, unsigned* dim_idx) const {
unsigned Domain::get_dimension_index(const std::string& name) const {
for (unsigned d = 0; d < dim_num_; ++d) {
if (dimension_ptrs_[d]->name() == name) {
*dim_idx = d;
return Status::Ok();
return d;
}
}

return Status_DomainError(
throw std::invalid_argument(
"Cannot get dimension index; Invalid dimension name");
}

Expand Down
5 changes: 2 additions & 3 deletions tiledb/sm/array_schema/domain.h
Original file line number Diff line number Diff line change
Expand Up @@ -403,10 +403,9 @@ class Domain {
* Gets the index in the domain of a given dimension name
*
* @param name Name of dimension to check for
* @param dim_idx The index of this dimension in the domain
* @return Status
* @return Dimension index
*/
Status get_dimension_index(const std::string& name, unsigned* dim_idx) const;
unsigned get_dimension_index(const std::string& name) const;

/** Returns true if at least one dimension has null tile extent. */
bool null_tile_extents() const;
Expand Down
3 changes: 1 addition & 2 deletions tiledb/sm/fragment/fragment_metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1971,8 +1971,7 @@ TileMetadata FragmentMetadata::get_tile_metadata(
unsigned dim_idx = 0;
const NDRange* mbr = nullptr;
if (is_dim) {
throw_if_not_ok(
array_schema_->domain().get_dimension_index(name, &dim_idx));
dim_idx = array_schema_->domain().get_dimension_index(name);
mbr = &rtree_.leaf(tile_idx);
}

Expand Down
39 changes: 37 additions & 2 deletions tiledb/sm/query/readers/aggregators/aggregate_with_count.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,17 @@ class AggregateWithCount {
// nullable. The bitmap tells us which cells was already filtered out by
// ranges or query conditions.
if (input_data.has_bitmap()) {
if (field_info_.is_nullable_) {
if (field_info_.is_dense_dim_) {
// Process for dense dimension values with bitmap.
for (uint64_t c = 0; c < input_data.size(); c++) {
auto bitmap_val = input_data.bitmap_at<BITMAP_T>(c);
auto value = dense_dim_value_at(input_data, c);
for (BITMAP_T i = 0; i < bitmap_val; i++) {
agg_policy.op(value, res, count);
count++;
}
}
} else if (field_info_.is_nullable_) {
// Process for nullable values with bitmap.
for (uint64_t c = 0; c < input_data.size(); c++) {
auto bitmap_val = input_data.bitmap_at<BITMAP_T>(c);
Expand All @@ -117,7 +127,14 @@ class AggregateWithCount {
}
}
} else {
if (field_info_.is_nullable_) {
if (field_info_.is_dense_dim_) {
// Process for dense dimension values with no bitmap.
for (uint64_t c = 0; c < input_data.size(); c++) {
auto value = dense_dim_value_at(input_data, c);
agg_policy.op(value, res, count);
count++;
}
} else if (field_info_.is_nullable_) {
// Process for nullable values with no bitmap.
for (uint64_t c = 0; c < input_data.size(); c++) {
if (val_policy.op(input_data.validity_at(c))) {
Expand Down Expand Up @@ -166,6 +183,24 @@ class AggregateWithCount {

return AGG_T();
}

/**
* Returns the dense dimension value at the specified cell if needed.
*
* @param input_data Input data.
* @param c Cell index.
* @return Value.
*/
inline AGG_T dense_dim_value_at(AggregateBuffer& input_data, uint64_t c) {
typedef typename type_data<T>::value_type VALUE_T;
if constexpr (
!std::is_same<AggPolicy, NoOp>::value &&
!std::is_same<AGG_T, std::string_view>::value) {
return input_data.value_at<VALUE_T>(0) + c * field_info_.is_slab_dim_;
}

return AGG_T();
}
};

} // namespace tiledb::sm
Expand Down
37 changes: 37 additions & 0 deletions tiledb/sm/query/readers/aggregators/field_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class FieldInfo {
FieldInfo()
: var_sized_(false)
, is_nullable_(false)
, is_dense_dim_(false)
, is_slab_dim_(false)
, cell_val_num_(1)
, type_(Datatype::UINT8){};

Expand All @@ -71,6 +73,35 @@ class FieldInfo {
: name_(name)
, var_sized_(var_sized)
, is_nullable_(is_nullable)
, is_dense_dim_(false)
, is_slab_dim_(false)
, cell_val_num_(cell_val_num)
, type_(type){};

/**
* Constructor.
*
* @param name Name of the field.
* @param var_sized Is the field var sized?
* @param is_nullable Is the field nullable?
* @param is_dense_dim Is the field nullable?
* @param is_slab_dim Is the dense dimension the slab dimension?
* @param cell_val_num Cell val num.
* @param type Data type of the field
*/
FieldInfo(
const std::string name,
const bool var_sized,
const bool is_nullable,
const bool is_dense_dim,
const bool is_slab_dim,
const unsigned cell_val_num,
const Datatype type)
: name_(name)
, var_sized_(var_sized)
, is_nullable_(is_nullable)
, is_dense_dim_(is_dense_dim)
, is_slab_dim_(is_slab_dim)
, cell_val_num_(cell_val_num)
, type_(type){};

Expand All @@ -87,6 +118,12 @@ class FieldInfo {
/** Is the field nullable? */
const bool is_nullable_;

/** Is the field a dense dimension? */
const bool is_dense_dim_;

/** Is the dense dimension the cell slab dimension? */
const bool is_slab_dim_;

/** Cell val num. */
const unsigned cell_val_num_;

Expand Down
Loading
Loading