Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add benchmark to test different chunk/shard/compression/storage configurations #22

Merged
merged 1 commit into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

option(BUILD_PYTHON "Build Python bindings" OFF)
option(BUILD_BENCHMARK "Build benchmarks" OFF)

if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
include(CTest)
Expand All @@ -33,7 +34,13 @@ else ()
message(STATUS "Skipping test targets")
endif ()

if (${BUILD_PYTHON})
if (BUILD_BENCHMARK)
add_subdirectory(benchmarks)
else ()
message(STATUS "Skipping benchmarks")
endif ()

if (BUILD_PYTHON)
add_subdirectory(python)
else ()
message(STATUS "Skipping Python bindings")
Expand Down
18 changes: 18 additions & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
set(project acquire-zarr)

set(tgt acquire-zarr-benchmark)
add_executable(${tgt} benchmark.cpp)
target_compile_definitions(${tgt} PUBLIC "TEST=\"${tgt}\"")
set_target_properties(${tgt} PROPERTIES
MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
)
target_include_directories(${tgt} PRIVATE
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/src/logger
)
target_link_libraries(${tgt} PRIVATE
acquire-logger
acquire-zarr
nlohmann_json::nlohmann_json
miniocpp::miniocpp
)
233 changes: 233 additions & 0 deletions benchmarks/benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
#include "acquire.zarr.h"
#include <chrono>
#include <fstream>
#include <vector>
#include <filesystem>
#include <iomanip>
#include <iostream>
#include <cmath>

#define DIM(name_, type_, array_size, chunk_size, shard_size) \
{ .name = (name_), \
.type = (type_), \
.array_size_px = (array_size), \
.chunk_size_px = (chunk_size), \
.shard_size_chunks = (shard_size) }

namespace fs = std::filesystem;

struct ChunkConfig
{
unsigned int t, c, z, y, x;
};

const std::vector<ChunkConfig> CHUNK_CONFIGS = { { 1, 1, 64, 64, 64 },
{ 1, 1, 128, 128, 128 },
{ 1, 1, 256, 256, 256 } };

const unsigned int ARRAY_WIDTH = 1920, ARRAY_HEIGHT = 1080, ARRAY_PLANES = 6,
ARRAY_CHANNELS = 3, ARRAY_TIMEPOINTS = 10;

const unsigned int NUM_RUNS = 5;

struct BenchmarkConfig
{
ChunkConfig chunk;
int zarr_version;
std::string compression;
std::string storage;
unsigned int chunks_per_shard_x;
unsigned int chunks_per_shard_y;
std::string s3_endpoint;
std::string s3_bucket;
std::string s3_access_key;
std::string s3_secret_key;
};

class Timer
{
using Clock = std::chrono::high_resolution_clock;
Clock::time_point start;

public:
Timer()
: start(Clock::now())
{
}
double elapsed()
{
auto end = Clock::now();
return std::chrono::duration<double>(end - start).count();
}
};

ZarrStream*
setup_stream(const BenchmarkConfig& config)
{
ZarrStreamSettings settings = { .store_path = "benchmark.zarr",
.s3_settings = nullptr,
.compression_settings = nullptr,
.data_type = ZarrDataType_uint16,
.version = static_cast<ZarrVersion>(
config.zarr_version) };

ZarrCompressionSettings comp_settings = {};
if (config.compression != "none") {
comp_settings.compressor = ZarrCompressor_Blosc1;
comp_settings.codec = config.compression == "lz4"
? ZarrCompressionCodec_BloscLZ4
: ZarrCompressionCodec_BloscZstd;
comp_settings.level = 1;
comp_settings.shuffle = 1;
settings.compression_settings = &comp_settings;
}

ZarrS3Settings s3_settings = {};
if (config.storage == "s3") {
s3_settings = {
.endpoint = config.s3_endpoint.c_str(),
.bucket_name = config.s3_bucket.c_str(),
.access_key_id = config.s3_access_key.c_str(),
.secret_access_key = config.s3_secret_key.c_str(),
};
settings.s3_settings = &s3_settings;
}

ZarrStreamSettings_create_dimension_array(&settings, 5);
auto* dims = settings.dimensions;

dims[0] =
DIM("t", ZarrDimensionType_Time, ARRAY_TIMEPOINTS, config.chunk.t, 1);
dims[1] =
DIM("c", ZarrDimensionType_Channel, ARRAY_CHANNELS, config.chunk.c, 1);
dims[2] =
DIM("z", ZarrDimensionType_Space, ARRAY_PLANES, config.chunk.z, 1);
dims[3] = DIM("y",
ZarrDimensionType_Space,
ARRAY_HEIGHT,
config.chunk.y,
config.chunks_per_shard_y);
dims[4] = DIM("x",
ZarrDimensionType_Space,
ARRAY_WIDTH,
config.chunk.x,
config.chunks_per_shard_x);

return ZarrStream_create(&settings);
}

double
run_benchmark(const BenchmarkConfig& config)
{
auto* stream = setup_stream(config);
if (!stream)
return -1.0;

const size_t frame_size = ARRAY_WIDTH * ARRAY_HEIGHT * sizeof(uint16_t);
std::vector<uint16_t> frame(ARRAY_WIDTH * ARRAY_HEIGHT, 0);
const auto num_frames = ARRAY_PLANES * ARRAY_CHANNELS * ARRAY_TIMEPOINTS;

Timer timer;
size_t bytes_out;
for (int i = 0; i < num_frames; ++i) {
if (ZarrStream_append(stream, frame.data(), frame_size, &bytes_out) !=
ZarrStatusCode_Success) {
ZarrStream_destroy(stream);
return -1.0;
}
}
double elapsed = timer.elapsed();

ZarrStream_destroy(stream);
if (config.storage == "filesystem") {
fs::remove_all("benchmark.zarr");
}
return elapsed;
}

int
main()
{
std::ofstream csv("zarr_benchmarks.csv");
csv << "chunk_size,zarr_version,compression,storage,chunks_per_shard_y,"
"chunks_per_shard_x,run,time_seconds\n";

std::vector<BenchmarkConfig> configs;
for (const auto& chunk : CHUNK_CONFIGS) {

// V2 configurations (no sharding)
for (const auto& compression : { "none", "lz4", "zstd" }) {
configs.push_back({ chunk, 2, compression, "filesystem", 1, 1 });

if (std::getenv("ZARR_S3_ENDPOINT")) {
configs.push_back({ chunk,
2,
compression,
"s3",
1,
1,
std::getenv("ZARR_S3_ENDPOINT"),
std::getenv("ZARR_S3_BUCKET_NAME"),
std::getenv("ZARR_S3_ACCESS_KEY_ID"),
std::getenv("ZARR_S3_SECRET_ACCESS_KEY") });
}
}

unsigned int max_cps_y = (ARRAY_HEIGHT + chunk.y - 1) / chunk.y;
unsigned int max_cps_x = (ARRAY_WIDTH + chunk.x - 1) / chunk.x;

// V3 configurations (with sharding)
for (unsigned int cps_y = 1; cps_y <= max_cps_y; cps_y *= 2) {
for (unsigned int cps_x = 1; cps_x <= max_cps_x; cps_x *= 2) {
for (const auto& compression : { "none", "lz4", "zstd" }) {
configs.push_back(
{ chunk, 3, compression, "filesystem", cps_x, cps_y });

if (std::getenv("ZARR_S3_ENDPOINT")) {
configs.push_back(
{ chunk,
3,
compression,
"s3",
cps_x,
cps_y,
std::getenv("ZARR_S3_ENDPOINT"),
std::getenv("ZARR_S3_BUCKET_NAME"),
std::getenv("ZARR_S3_ACCESS_KEY_ID"),
std::getenv("ZARR_S3_SECRET_ACCESS_KEY") });
}
}
}
}
}

for (const auto& config : configs) {
std::string chunk_str = std::to_string(config.chunk.t) + "x" +
std::to_string(config.chunk.c) + "x" +
std::to_string(config.chunk.z) + "x" +
std::to_string(config.chunk.y) + "x" +
std::to_string(config.chunk.x);

for (unsigned int run = 1; run <= NUM_RUNS; ++run) {
std::cout << "Benchmarking " << chunk_str << " Zarr V"
<< config.zarr_version
<< ", compression: " << config.compression
<< ", storage: " << config.storage
<< ", CPS (y): " << config.chunks_per_shard_y
<< ", CPS (x): " << config.chunks_per_shard_x << ", (run "
<< run << " / " << NUM_RUNS << ")...";
double time = run_benchmark(config);
std::cout << " " << time << "s\n";
if (time >= 0) {
csv << chunk_str << "," << config.zarr_version << ","
<< config.compression << "," << config.storage << ","
<< config.chunks_per_shard_y << ","
<< config.chunks_per_shard_x << "," << run << ","
<< std::fixed << std::setprecision(3) << time << "\n";
}
csv.flush();
}
}

return 0;
}
Loading