Skip to content

Commit

Permalink
add microbenchmark
Browse files Browse the repository at this point in the history
Signed-off-by: Lloyd-Pottiger <[email protected]>
  • Loading branch information
Lloyd-Pottiger committed Nov 27, 2024
1 parent d30951f commit 895cea3
Show file tree
Hide file tree
Showing 2 changed files with 171 additions and 7 deletions.
8 changes: 7 additions & 1 deletion dbms/src/Columns/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

check_then_add_sources_compile_flag (
TIFLASH_ENABLE_ARCH_HASWELL_SUPPORT
"${TIFLASH_COMPILER_ARCH_HASWELL_FLAG}"
column_vector_perftest.cpp
)

add_executable (column_vector_perftest column_vector_perftest.cpp)
target_link_libraries (column_vector_perftest dbms)
target_link_libraries (column_vector_perftest dbms delta_merge)
170 changes: 164 additions & 6 deletions dbms/src/Columns/tests/column_vector_perftest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@
// limitations under the License.

#include <Columns/ColumnVector.h>
#include <Columns/ColumnsCommon.h>
#include <Common/typeid_cast.h>
#include <common/types.h>
#include <fmt/core.h>

#include <atomic>
#include <chrono>
#include <memory>
#include <random>
#include <thread>

Expand All @@ -30,6 +31,103 @@ std::random_device rd;

using StopFlag = std::atomic<bool>;

ColumnPtr filterV1(ColumnPtr & col, IColumn::Filter & filt, ssize_t result_size_hint)
{
const auto & data = typeid_cast<const ColumnVector<Int64> *>(col.get())->getData();
size_t size = col->size();
if (size != filt.size())
throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);

auto res = ColumnVector<Int64>::create();
using Container = ColumnVector<Int64>::Container;
Container & res_data = res->getData();

if (result_size_hint)
{
if (result_size_hint < 0)
result_size_hint = countBytesInFilter(filt);
res_data.reserve(result_size_hint);
}

const UInt8 * filt_pos = &filt[0];
const UInt8 * filt_end = filt_pos + size;
const Int64 * data_pos = &data[0];

#if __SSE2__
/** A slightly more optimized version.
* Based on the assumption that often pieces of consecutive values
* completely pass or do not pass the filter.
* Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
*/

static constexpr size_t SIMD_BYTES = 16;
const __m128i zero16 = _mm_setzero_si128();
const UInt8 * filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES;

while (filt_pos < filt_end_sse)
{
int mask
= _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));

if (0 == mask)
{
/// Nothing is inserted.
}
else if (0xFFFF == mask)
{
res_data.insert(data_pos, data_pos + SIMD_BYTES);
}
else
{
for (size_t i = 0; i < SIMD_BYTES; ++i)
if (filt_pos[i])
res_data.push_back(data_pos[i]);
}

filt_pos += SIMD_BYTES;
data_pos += SIMD_BYTES;
}
#endif

while (filt_pos < filt_end)
{
if (*filt_pos)
res_data.push_back(*data_pos);

++filt_pos;
++data_pos;
}

return res;
}

ColumnPtr filterV2(ColumnPtr & col, IColumn::Filter & filt, ssize_t result_size_hint)
{
const auto & data = typeid_cast<const ColumnVector<Int64> *>(col.get())->getData();
size_t size = col->size();
if (size != filt.size())
throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);

auto res = ColumnVector<Int64>::create();
using Container = ColumnVector<Int64>::Container;
Container & res_data = res->getData();

if (result_size_hint)
{
if (result_size_hint < 0)
result_size_hint = countBytesInFilter(filt);
res_data.reserve(result_size_hint);
}

const UInt8 * filt_pos = &filt[0];
const UInt8 * filt_end = filt_pos + size;
const Int64 * data_pos = &data[0];

filterImpl(filt_pos, filt_end, data_pos, res_data);

return res;
}

template <typename T>
ColumnPtr buildColumn(int n)
{
Expand All @@ -56,6 +154,66 @@ IColumn::Selector buildSelector(int num_rows, int num_columns)
return selector;
}

IColumn::Filter buildFilter(int num_rows, int num_columns)
{
std::mt19937 mt(rd());
std::uniform_int_distribution<int> dist(1, 100);

IColumn::Filter filter;
filter.resize(num_rows);
for (int i = 0; i < num_rows; ++i)
filter[i] = dist(mt) % num_columns == 0;
return filter;
}

template <typename T>
void testFilter(int num_rows, int num_columns, int seconds)
{
ColumnPtr src = buildColumn<T>(num_rows);
auto filter = buildFilter(num_rows, num_columns);

StopFlag stop_flag = false;
std::vector<std::atomic<Int64>> counters(2);
for (auto & counter : counters)
counter = 0;

const std::vector<std::function<void()>> filter_func
= {[&] {
while (!stop_flag.load(std::memory_order_relaxed))
{
filterV1(src, filter, -1);
counters[0].fetch_add(1, std::memory_order_relaxed);
}
},
[&] {
while (!stop_flag.load(std::memory_order_relaxed))
{
filterV2(src, filter, -1);
counters[1].fetch_add(1, std::memory_order_relaxed);
}
}};

std::vector<std::thread> threads;
threads.reserve(filter_func.size());
auto start = std::chrono::high_resolution_clock::now();
for (const auto & f : filter_func)
{
threads.emplace_back(f);
}

std::this_thread::sleep_for(std::chrono::seconds(seconds));
stop_flag.store(true);
for (auto & t : threads)
t.join();

auto cur = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(cur - start);
for (size_t i = 0; i < counters.size(); ++i)
{
std::cout << fmt::format("FilterV{}: {:<10}", i + 1, counters[i].load() * 1000 / duration.count()) << std::endl;
}
}

template <typename T>
void testScatter(int num_rows, int num_columns, int seconds)
{
Expand Down Expand Up @@ -100,14 +258,14 @@ int main(int argc [[maybe_unused]], char ** argv [[maybe_unused]])

using TestHandler = std::function<void(int rows, int columns, int seconds)>;
static const std::unordered_map<String, std::unordered_map<String, TestHandler>> handlers
= {{"int", {{"scatter", DB::tests::testScatter<Int32>}}},
{"int64", {{"scatter", DB::tests::testScatter<Int64>}}}};
= {{"int", {{"scatter", DB::tests::testScatter<Int32>}, {"filter", DB::tests::testFilter<Int64>}}},
{"int64", {{"scatter", DB::tests::testScatter<Int64>}, {"filter", DB::tests::testFilter<Int64>}}}};

String type_name = argv[1];
String method = argv[2];
int rows = argc >= 4 ? atoi(argv[3]) : 10000;
int columns = argc >= 5 ? atoi(argv[4]) : 5;
int seconds = argc >= 6 ? atoi(argv[5]) : 10;
int rows = argc >= 4 ? std::stoi(argv[3]) : 10000;
int columns = argc >= 5 ? std::stoi(argv[4]) : 5;
int seconds = argc >= 6 ? std::stoi(argv[5]) : 10;

const auto & find_handler = [](const String & title, const String & name, const auto & handler_map) {
auto it = handler_map.find(name);
Expand Down

0 comments on commit 895cea3

Please sign in to comment.