Skip to content

Commit

Permalink
remove dead code related to max buffer sizes
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbindar committed Sep 9, 2024
1 parent ef7aba6 commit c857b28
Show file tree
Hide file tree
Showing 11 changed files with 7,485 additions and 11,565 deletions.
262 changes: 0 additions & 262 deletions tiledb/sm/array/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -919,131 +919,6 @@ QueryType Array::get_query_type() const {
return query_type_;
}

Status Array::get_max_buffer_size(
const char* name, const void* subarray, uint64_t* buffer_size) {
// Check if array is open
if (!is_open_) {
return LOG_STATUS(
Status_ArrayError("Cannot get max buffer size; Array is not open"));
}

// Error if the array was not opened in read mode
if (query_type_ != QueryType::READ) {
return LOG_STATUS(
Status_ArrayError("Cannot get max buffer size; "
"Array was not opened in read mode"));
}

// Check if name is null
if (name == nullptr) {
return LOG_STATUS(Status_ArrayError(
"Cannot get max buffer size; Attribute/Dimension name is null"));
}

// Not applicable to heterogeneous domains
if (!array_schema_latest().domain().all_dims_same_type()) {
return LOG_STATUS(
Status_ArrayError("Cannot get max buffer size; Function not "
"applicable to heterogeneous domains"));
}

// Not applicable to variable-sized dimensions
if (!array_schema_latest().domain().all_dims_fixed()) {
return LOG_STATUS(Status_ArrayError(
"Cannot get max buffer size; Function not "
"applicable to domains with variable-sized dimensions"));
}

// Check if name is attribute or dimension
bool is_dim = array_schema_latest().is_dim(name);
bool is_attr = array_schema_latest().is_attr(name);

// Check if attribute/dimension exists
if (name != constants::coords && !is_dim && !is_attr) {
return LOG_STATUS(Status_ArrayError(
std::string("Cannot get max buffer size; Attribute/Dimension '") +
name + "' does not exist"));
}

// Check if attribute/dimension is fixed sized
if (array_schema_latest().var_size(name)) {
return LOG_STATUS(Status_ArrayError(
std::string("Cannot get max buffer size; Attribute/Dimension '") +
name + "' is var-sized"));
}

RETURN_NOT_OK(compute_max_buffer_sizes(subarray));

// Retrieve buffer size
auto it = last_max_buffer_sizes_.find(name);
assert(it != last_max_buffer_sizes_.end());
*buffer_size = it->second.first;

return Status::Ok();
}

Status Array::get_max_buffer_size(
const char* name,
const void* subarray,
uint64_t* buffer_off_size,
uint64_t* buffer_val_size) {
// Check if array is open
if (!is_open_) {
return LOG_STATUS(
Status_ArrayError("Cannot get max buffer size; Array is not open"));
}

// Error if the array was not opened in read mode
if (query_type_ != QueryType::READ) {
return LOG_STATUS(
Status_ArrayError("Cannot get max buffer size; "
"Array was not opened in read mode"));
}

// Check if name is null
if (name == nullptr) {
return LOG_STATUS(Status_ArrayError(
"Cannot get max buffer size; Attribute/Dimension name is null"));
}

// Not applicable to heterogeneous domains
if (!array_schema_latest().domain().all_dims_same_type()) {
return LOG_STATUS(
Status_ArrayError("Cannot get max buffer size; Function not "
"applicable to heterogeneous domains"));
}

// Not applicable to variable-sized dimensions
if (!array_schema_latest().domain().all_dims_fixed()) {
return LOG_STATUS(Status_ArrayError(
"Cannot get max buffer size; Function not "
"applicable to domains with variable-sized dimensions"));
}

RETURN_NOT_OK(compute_max_buffer_sizes(subarray));

// Check if attribute/dimension exists
auto it = last_max_buffer_sizes_.find(name);
if (it == last_max_buffer_sizes_.end()) {
return LOG_STATUS(Status_ArrayError(
std::string("Cannot get max buffer size; Attribute/Dimension '") +
name + "' does not exist"));
}

// Check if attribute/dimension is var-sized
if (!array_schema_latest().var_size(name)) {
return LOG_STATUS(Status_ArrayError(
std::string("Cannot get max buffer size; Attribute/Dimension '") +
name + "' is fixed-sized"));
}

// Retrieve buffer sizes
*buffer_off_size = it->second.first;
*buffer_val_size = it->second.second;

return Status::Ok();
}

Status Array::reopen() {
// Note: Array will only reopen for reads. This is why we are checking the
// timestamp for the array directory and not new components. This needs to be
Expand Down Expand Up @@ -1831,143 +1706,6 @@ Array::open_for_writes() {
return {array_schema_latest, array_schemas_all};
}

void Array::clear_last_max_buffer_sizes() {
last_max_buffer_sizes_.clear();
last_max_buffer_sizes_subarray_.clear();
last_max_buffer_sizes_subarray_.shrink_to_fit();
}

Status Array::compute_max_buffer_sizes(const void* subarray) {
// Applicable only to domains where all dimensions have the same type
if (!array_schema_latest().domain().all_dims_same_type()) {
return LOG_STATUS(
Status_ArrayError("Cannot compute max buffer sizes; Inapplicable when "
"dimension domains have different types"));
}

// Allocate space for max buffer sizes subarray
auto dim_num = array_schema_latest().dim_num();
auto coord_size{
array_schema_latest().domain().dimension_ptr(0)->coord_size()};
auto subarray_size = 2 * dim_num * coord_size;
last_max_buffer_sizes_subarray_.resize(subarray_size);

// Compute max buffer sizes
if (last_max_buffer_sizes_.empty() ||
std::memcmp(
&last_max_buffer_sizes_subarray_[0], subarray, subarray_size) != 0) {
last_max_buffer_sizes_.clear();

// Get all attributes and coordinates
auto& attributes = array_schema_latest().attributes();
last_max_buffer_sizes_.clear();
for (const auto& attr : attributes)
last_max_buffer_sizes_[attr->name()] =
std::pair<uint64_t, uint64_t>(0, 0);
last_max_buffer_sizes_[constants::coords] =
std::pair<uint64_t, uint64_t>(0, 0);
for (unsigned d = 0; d < dim_num; ++d)
last_max_buffer_sizes_
[array_schema_latest().domain().dimension_ptr(d)->name()] =
std::pair<uint64_t, uint64_t>(0, 0);

RETURN_NOT_OK(compute_max_buffer_sizes(subarray, &last_max_buffer_sizes_));
}

// Update subarray
std::memcpy(&last_max_buffer_sizes_subarray_[0], subarray, subarray_size);

return Status::Ok();
}

Status Array::compute_max_buffer_sizes(
const void* subarray,
std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>*
buffer_sizes) const {
if (remote_) {
auto rest_client = resources_.rest_client();
if (rest_client == nullptr) {
return LOG_STATUS(Status_ArrayError(
"Cannot get max buffer sizes; remote array with no REST client."));
}

return rest_client->get_array_max_buffer_sizes(
array_uri_, array_schema_latest(), subarray, buffer_sizes);
}

// Keep the current opened array alive for the duration of this call.
auto opened_array = opened_array_;
auto& fragment_metadata = opened_array->fragment_metadata();
auto& array_schema_latest = opened_array->array_schema_latest();

// Return if there are no metadata
if (fragment_metadata.empty()) {
return Status::Ok();
}

// First we calculate a rough upper bound. Especially for dense
// arrays, this will not be accurate, as it accounts only for the
// non-empty regions of the subarray.
for (auto& meta : fragment_metadata) {
meta->add_max_buffer_sizes(*encryption_key(), subarray, buffer_sizes);
}

// Prepare an NDRange for the subarray
auto dim_num = array_schema_latest.dim_num();
NDRange sub(dim_num);
auto sub_ptr = (const unsigned char*)subarray;
uint64_t offset = 0;
for (unsigned d = 0; d < dim_num; ++d) {
auto r_size{2 * array_schema_latest.dimension_ptr(d)->coord_size()};
sub[d] = Range(&sub_ptr[offset], r_size);
offset += r_size;
}

// Rectify bound for dense arrays
if (array_schema_latest.dense()) {
auto cell_num = array_schema_latest.domain().cell_num(sub);
// `cell_num` becomes 0 when `subarray` is huge, leading to a
// `uint64_t` overflow.
if (cell_num != 0) {
for (auto& it : *buffer_sizes) {
if (array_schema_latest.var_size(it.first)) {
it.second.first = cell_num * constants::cell_var_offset_size;
it.second.second +=
cell_num * datatype_size(array_schema_latest.type(it.first));
} else {
it.second.first = cell_num * array_schema_latest.cell_size(it.first);
}
}
}
}

// Rectify bound for sparse arrays with integer domain, without duplicates
if (!array_schema_latest.dense() && !array_schema_latest.allows_dups() &&
array_schema_latest.domain().all_dims_int()) {
auto cell_num = array_schema_latest.domain().cell_num(sub);
// `cell_num` becomes 0 when `subarray` is huge, leading to a
// `uint64_t` overflow.
if (cell_num != 0) {
for (auto& it : *buffer_sizes) {
if (!array_schema_latest.var_size(it.first)) {
// Check for overflow
uint64_t new_size =
cell_num * array_schema_latest.cell_size(it.first);
if (new_size / array_schema_latest.cell_size((it.first)) !=
cell_num) {
continue;
}

// Potentially rectify size
it.second.first = std::min(it.second.first, new_size);
}
}
}
}

return Status::Ok();
}

void Array::do_load_metadata() {
if (!array_directory().loaded()) {
throw ArrayException(
Expand Down
55 changes: 0 additions & 55 deletions tiledb/sm/array/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -617,23 +617,6 @@ class Array {
/** Retrieves the query type. Throws if the array is not open. */
QueryType get_query_type() const;

/**
* Returns the max buffer size given a fixed-sized attribute/dimension and
* a subarray. Errors if the array is not open.
*/
Status get_max_buffer_size(
const char* name, const void* subarray, uint64_t* buffer_size);

/**
* Returns the max buffer size given a var-sized attribute/dimension and
* a subarray. Errors if the array is not open.
*/
Status get_max_buffer_size(
const char* name,
const void* subarray,
uint64_t* buffer_off_size,
uint64_t* buffer_val_size);

/**
* Returns a reference to the private encryption key.
*/
Expand Down Expand Up @@ -1117,16 +1100,6 @@ class Array {
/** The array config. */
Config config_;

/** Stores the max buffer sizes requested last time by the user .*/
std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>
last_max_buffer_sizes_;

/**
* This is the last subarray used by the user to retrieve the
* max buffer sizes.
*/
std::vector<uint8_t> last_max_buffer_sizes_subarray_;

/** True if the array is remote (has `tiledb://` URI scheme). */
bool remote_;

Expand Down Expand Up @@ -1199,34 +1172,6 @@ class Array {
std::unordered_map<std::string, shared_ptr<ArraySchema>>>
open_for_writes();

/** Clears the cached max buffer sizes and subarray. */
void clear_last_max_buffer_sizes();

/**
* Computes the maximum buffer sizes for all attributes given a subarray,
* which are cached locally in the instance.
*/
Status compute_max_buffer_sizes(const void* subarray);

/**
* Computes an upper bound on the buffer sizes required for a read
* query, for a given subarray and set of attributes. Note that
* the attributes are already set in `max_buffer_sizes`
*
* @param subarray The subarray to focus on. Note that it must have the same
* underlying type as the array domain.
* @param max_buffer_sizes The buffer sizes to be retrieved. This is a map
* from the attribute (or coordinates) name to a pair of sizes (in bytes). For
* fixed-sized attributes, the second size is ignored. For var-sized
* attributes, the first is the offsets size and the second is the
* values size.
* @return Status
*/
Status compute_max_buffer_sizes(
const void* subarray,
std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>*
max_buffer_sizes_) const;

/**
* Load non-remote array metadata.
*/
Expand Down
Loading

0 comments on commit c857b28

Please sign in to comment.