Skip to content

Commit

Permalink
operator
Browse files Browse the repository at this point in the history
  • Loading branch information
cubicYYY committed Sep 2, 2024
1 parent 195760f commit f29a7bd
Show file tree
Hide file tree
Showing 16 changed files with 782 additions and 189 deletions.
1 change: 1 addition & 0 deletions include/api.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#pragma once

#include "froaring_api/and.h"
#include "froaring_api/equal.h"
5 changes: 5 additions & 0 deletions include/array_container.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ class ArrayContainer : public froaring_container_t {
public:
using IndexOrNumType = froaring::can_fit_t<DataBits>;
using SizeType = froaring::can_fit_t<DataBits + 1>;
/// Bit capacity for containers indexed
static constexpr size_t ContainerCapacity = (1 << DataBits);
/// Array threshold (Array will not be optimum for storing more
/// elements)
static constexpr size_t ArrayToBitmapCountThreshold = ContainerCapacity / DataBits;

public:
void debug_print() {
Expand Down
168 changes: 130 additions & 38 deletions include/binsearch_index.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#pragma once

#include "handle.h"
#include "utils.h"
#include "transform.h"
#include "utils.h"

namespace froaring {

Expand All @@ -29,15 +29,7 @@ class BinsearchIndex : public froaring_container_t {
using CTy = froaring::ContainerType;
using ContainerHandle = froaring::ContainerHandle<IndexType>;

/// Bit capacity for containers indexed
static constexpr size_t ContainerCapacity = (1 << DataBits);
/// Array threshold (Array will not be optimum for storing more
/// elements)
static constexpr size_t ArrayToBitmapCountThreshold = ContainerCapacity / DataBits;
/// RLE threshold (RLE will not be optimum for more runs)
static constexpr size_t RleToBitmapRunThreshold = ContainerCapacity / (DataBits * 2);
public:

BinsearchIndex(SizeType capacity = CONTAINERS_INIT_CAPACITY, SizeType size = 0)
: capacity(capacity),
size(size),
Expand All @@ -47,17 +39,28 @@ class BinsearchIndex : public froaring_container_t {

void debug_print() const {
for (SizeType i = 0; i < size; ++i) {
std::cout << "Index: " << containers[i].index << " Type: " << static_cast<int>(containers[i].type) << std::endl;
std::cout << "Index: " << containers[i].index << " Type: " << static_cast<int>(containers[i].type)
<< " Card.:";
switch (containers[i].type) {
case CTy::RLE:
CAST_TO_RLE(containers[i].ptr)->debug_print();
case CTy::RLE: {
std::cout << int(static_cast<RLEContainer<WordType, DataBits>*>(containers[i].ptr)->cardinality())
<< std::endl;
static_cast<RLEContainer<WordType, DataBits>*>(containers[i].ptr)->debug_print();
break;
case CTy::Array:
CAST_TO_ARRAY(containers[i].ptr)->debug_print();
}
case CTy::Array: {
std::cout << int(static_cast<ArrayContainer<WordType, DataBits>*>(containers[i].ptr)->cardinality())
<< std::endl;
static_cast<ArrayContainer<WordType, DataBits>*>(containers[i].ptr)->debug_print();
break;
case CTy::Bitmap:
CAST_TO_BITMAP(containers[i].ptr)->debug_print();
}
case CTy::Bitmap: {
std::cout
<< int(static_cast<BitmapContainer<WordType, DataBits>*>(containers[i].ptr)->cardinality())
<< std::endl;
static_cast<BitmapContainer<WordType, DataBits>*>(containers[i].ptr)->debug_print();
break;
}
default:
FROARING_UNREACHABLE
}
Expand Down Expand Up @@ -106,15 +109,14 @@ class BinsearchIndex : public froaring_container_t {
// Now we found the corresponding container
switch (containers[entry_pos].type) {
case CTy::RLE:
return CAST_TO_RLE(containers[entry_pos].ptr)->test(data);
return static_cast<RLEContainer<WordType, DataBits>*>(containers[entry_pos].ptr)->test(data);
case CTy::Array:
return CAST_TO_ARRAY(containers[entry_pos].ptr)->test(data);
return static_cast<ArrayContainer<WordType, DataBits>*>(containers[entry_pos].ptr)->test(data);
case CTy::Bitmap:
return CAST_TO_BITMAP(containers[entry_pos].ptr)->test(data);
return static_cast<BitmapContainer<WordType, DataBits>*>(containers[entry_pos].ptr)->test(data);
default:
FROARING_UNREACHABLE
}
return false;
}

// Set a value in the corresponding container
Expand All @@ -141,14 +143,14 @@ class BinsearchIndex : public froaring_container_t {
// Now we found the corresponding container
switch (containers[pos].type) {
case CTy::RLE: {
CAST_TO_RLE(containers[pos].ptr)->set(data);
static_cast<RLEContainer<WordType, DataBits>*>(containers[pos].ptr)->set(data);
break;
}
case CTy::Array: {
auto array_ptr = CAST_TO_ARRAY(containers[pos].ptr);
array_ptr->set(data);
auto array_ptr = static_cast<ArrayContainer<WordType, DataBits>*>(containers[pos].ptr);
array_ptr->set(data);
// Transform into a bitmap container if it gets bigger
if (array_ptr->size >= ArrayToBitmapCountThreshold) {
if (array_ptr->size >= ArraySized::ArrayToBitmapCountThreshold) {
auto new_bitmap = froaring_array_to_bitmap<WordType, DataBits>(array_ptr);
delete containers[pos].ptr;
containers[pos].ptr = new_bitmap;
Expand All @@ -157,28 +159,73 @@ class BinsearchIndex : public froaring_container_t {
break;
}
case CTy::Bitmap: {
CAST_TO_BITMAP(containers[pos].ptr)->set(data);
static_cast<BitmapContainer<WordType, DataBits>*>(containers[pos].ptr)->set(data);
break;
}
default:
FROARING_UNREACHABLE
}
}

bool test_and_set(ValueType value) {
can_fit_t<IndexBits> index;
can_fit_t<DataBits> data;
num2index_n_data<IndexBits, DataBits>(value, index, data);

SizeType pos = lower_bound(index);

// Not found, insert a new container:
if (pos == size || containers[pos].index != index) {
if (size == capacity) {
expand();
}
std::memmove(&containers[pos + 1], &containers[pos], (size - pos) * sizeof(ContainerHandle));
auto array_ptr = new ArraySized(ARRAY_CONTAINER_INIT_CAPACITY, 1);
array_ptr->vals[0] = data;
containers[pos] = ContainerHandle(array_ptr, CTy::Array, index);
size++;
return true;
}

// Now we found the corresponding container
switch (containers[pos].type) {
case CTy::RLE: {
return static_cast<RLEContainer<WordType, DataBits>*>(containers[pos].ptr)->test_and_set(data);
}
case CTy::Array: {
auto array_ptr = static_cast<ArrayContainer<WordType, DataBits>*>(containers[pos].ptr);
bool was_set = array_ptr->test_and_set(data);
// Transform into a bitmap container if it gets bigger
if (array_ptr->size >= ArraySized::ArrayToBitmapCountThreshold) {
auto new_bitmap = froaring_array_to_bitmap<WordType, DataBits>(array_ptr);
delete containers[pos].ptr;
containers[pos].ptr = new_bitmap;
containers[pos].type = CTy::Bitmap;
}
return was_set;
}
case CTy::Bitmap: {
return static_cast<BitmapContainer<WordType, DataBits>*>(containers[pos].ptr)->test_and_set(data);
}
default:
FROARING_UNREACHABLE
}
}

// Calculate the total cardinality of all containers
size_t cardinality() const {
size_t total = 0;
for (SizeType i = 0; i < size; ++i) {
auto& entry = containers[i];
switch (entry.type) {
case CTy::RLE:
total += CAST_TO_RLE(entry.ptr)->cardinality();
total += static_cast<RLEContainer<WordType, DataBits>*>(entry.ptr)->cardinality();
break;
case CTy::Array:
total += CAST_TO_ARRAY(entry.ptr)->cardinality();
total += static_cast<ArrayContainer<WordType, DataBits>*>(entry.ptr)->cardinality();
break;
case CTy::Bitmap:
total += CAST_TO_BITMAP(entry.ptr)->cardinality();
total += static_cast<BitmapContainer<WordType, DataBits>*>(entry.ptr)->cardinality();
break;
default:
FROARING_UNREACHABLE
Expand All @@ -203,8 +250,8 @@ class BinsearchIndex : public froaring_container_t {
// Now we found the corresponding container
switch (entry.type) {
case CTy::RLE: {
CAST_TO_RLE(entry.ptr)->reset(data);
if (CAST_TO_RLE(entry.ptr)->run_count == 0) {
static_cast<RLEContainer<WordType, DataBits>*>(entry.ptr)->reset(data);
if (static_cast<RLEContainer<WordType, DataBits>*>(entry.ptr)->run_count == 0) {
delete entry.ptr;
if (size > 1) {
std::memmove(&containers[pos], &containers[pos + 1],
Expand All @@ -215,8 +262,8 @@ class BinsearchIndex : public froaring_container_t {
break;
}
case CTy::Array: {
CAST_TO_ARRAY(entry.ptr)->reset(data);
if (CAST_TO_ARRAY(entry.ptr)->size == 0) {
static_cast<ArrayContainer<WordType, DataBits>*>(entry.ptr)->reset(data);
if (static_cast<ArrayContainer<WordType, DataBits>*>(entry.ptr)->size == 0) {
delete entry.ptr;
if (size > 1) {
std::memmove(&containers[pos], &containers[pos + 1],
Expand All @@ -227,8 +274,8 @@ class BinsearchIndex : public froaring_container_t {
break;
}
case CTy::Bitmap: {
CAST_TO_BITMAP(entry.ptr)->reset(data);
if (CAST_TO_BITMAP(entry.ptr)->cardinality() == 0) {
static_cast<BitmapContainer<WordType, DataBits>*>(entry.ptr)->reset(data);
if (static_cast<BitmapContainer<WordType, DataBits>*>(entry.ptr)->cardinality() == 0) {
delete entry.ptr;
if (size > 1) {
std::memmove(&containers[pos], &containers[pos + 1],
Expand All @@ -243,19 +290,18 @@ class BinsearchIndex : public froaring_container_t {
}
}


// Release all containers
~BinsearchIndex() {
for (SizeType i = 0; i < size; ++i) {
switch (containers[i].type) {
case CTy::Array:
CAST_TO_ARRAY(containers[i].ptr)->~ArrayContainer();
static_cast<ArrayContainer<WordType, DataBits>*>(containers[i].ptr)->~ArrayContainer();
break;
case CTy::Bitmap:
CAST_TO_BITMAP(containers[i].ptr)->~BitmapContainer();
static_cast<BitmapContainer<WordType, DataBits>*>(containers[i].ptr)->~BitmapContainer();
break;
case CTy::RLE:
CAST_TO_RLE(containers[i].ptr)->~RLEContainer();
static_cast<RLEContainer<WordType, DataBits>*>(containers[i].ptr)->~RLEContainer();
break;
default:
FROARING_UNREACHABLE
Expand All @@ -265,6 +311,52 @@ class BinsearchIndex : public froaring_container_t {
free(containers);
}

static BinsearchIndex<WordType, IndexBits, DataBits>* and_(const BinsearchIndex<WordType, IndexBits, DataBits>* a,
const BinsearchIndex<WordType, IndexBits, DataBits>* b,
CTy& res_type) {
auto result = new BinsearchIndex<WordType, IndexBits, DataBits>(a->capacity, 0);
size_t i = 0, j = 0;
size_t new_container_counts = 0;
while (i < a->size && j < b->size) {
if (a->containers[i].index < b->containers[j].index) {
i++;
} else if (a->containers[i].index > b->containers[j].index) {
j++;
} else {
CTy res_type;
auto res = froaring_and<WordType, DataBits>(a->containers[i].ptr, b->containers[j].ptr,
a->containers[i].type, b->containers[j].type, res_type);
result->containers[new_container_counts++] = ContainerHandle(res, res_type, a->containers[i].index);
i++;
j++;
}
}
result->size = new_container_counts;
res_type = CTy::Containers;
// TODO: convert to single container if possible
return result;
}
static bool equals(const BinsearchIndex<WordType, IndexBits, DataBits>* a,
const BinsearchIndex<WordType, IndexBits, DataBits>* b) {
if (a->size != b->size) {
return false;
}

for (size_t i = 0; i < a->size; ++i) { // quick check
if (a->containers[i].index != b->containers[i].index) {
return false;
}
}
for (size_t i = 0; i < a->size; ++i) {
auto res = froaring_equal<WordType, DataBits>(a->containers[i].ptr, b->containers[i].ptr,
a->containers[i].type, b->containers[i].type);
if (!res) {
return false;
}
}
return true;
}

private:
void expand() {
capacity *= 2;
Expand Down
Loading

0 comments on commit f29a7bd

Please sign in to comment.