Skip to content

Commit

Permalink
[Chore] Add extra logging for experiments (#19)
Browse files Browse the repository at this point in the history
* [Chore] Add logging for RocksDB
* [Jupyter] Update jupyter notebooks
  • Loading branch information
ephoris authored May 6, 2024
1 parent a7570d0 commit 1ca540c
Show file tree
Hide file tree
Showing 10 changed files with 598 additions and 311 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,4 @@ data/*.data
cmake-build-debug/
src/bliss/.idea/

db_working_home
818 changes: 527 additions & 291 deletions notebook/heatmaps.ipynb

Large diffs are not rendered by default.

26 changes: 12 additions & 14 deletions script/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,7 @@
from infra.pybliss import BlissArgs, PyBliss
from infra.util import get_file_params

K_CHOICES = [1, 3, 5, 10, 25, 50]
L_CHOICES = [1, 3, 5, 10, 25, 50]
SPECIAL_KL = ((0, 0), (100, 100))
INDEXES = ["alex", "lipp"]
INDEXES = ["btree"]
PRELOAD_FACTOR = 0.4
WRITE_FACTOR = 0.4
READ_FACTOR = 0.2
Expand All @@ -22,21 +19,22 @@ def main(args):
bliss = PyBliss(args.bliss, args.smoke_test)
db = BlissDB(args.result_db)
files = os.listdir(args.data_folder)
exp_pairs = ((file, index) for file in files for index in INDEXES)
kwargs = {
"preload_factor": PRELOAD_FACTOR,
"write_factor": WRITE_FACTOR,
"read_factor": READ_FACTOR,
"mixed_ratio": MIXED_RATIO,
"file_type": "binary",
"seed": 0,
"use_preload": PRELOAD,
}

exp_pairs = ((file, index) for file in files for index in INDEXES)
for file, index in exp_pairs:
_, k_pt, l_pt = get_file_params(file)
logging.info(f"Running bliss ({index}, {file})")
bliss_args = BlissArgs(
data_file=os.path.join(args.data_folder, file),
index_type=index,
preload_factor=PRELOAD_FACTOR,
write_factor=WRITE_FACTOR,
read_factor=READ_FACTOR,
mixed_ratio=MIXED_RATIO,
file_type="binary",
seed=0,
use_preload=PRELOAD,
data_file=os.path.join(args.data_folder, file), index_type=index, **kwargs
)
logging.debug(f"BlissArgs: {bliss_args}")
stats = bliss.run_single_bliss_bench(bliss_args)
Expand Down
10 changes: 7 additions & 3 deletions script/data_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
import argparse
from infra.pybods import PyBods, BodsArgs

K_CHOICES = [1, 3, 5, 10, 25, 50, 100]
L_CHOICES = [1, 3, 5, 10, 25, 50, 100]
SPECIAL_KL = [(0, 0), (100, 100)]
# K_CHOICES = [1, 3, 5, 10, 25, 50, 100]
# L_CHOICES = [1, 3, 5, 10, 25, 50, 100]
# SPECIAL_KL = [(0, 0), (100, 100)]
K_CHOICES = [3, 5, 10]
L_CHOICES = [10]
SPECIAL_KL = []

NUM_KEYS = 500_000_000
BINARY_FILE_FORMAT = True
Expand All @@ -28,6 +31,7 @@ def main(args: argparse.Namespace):
k_pt=k_pt,
l_pt=l_pt,
binary_file_format=BINARY_FILE_FORMAT,
seed=2169,
)
results = bods.gen_data(bods_args)
print(results)
Expand Down
7 changes: 7 additions & 0 deletions script/infra/pybliss.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dataclasses import dataclass
import re
import os
import logging
import subprocess
import random
Expand Down Expand Up @@ -92,6 +93,12 @@ def run_single_bliss_bench(self, args: BlissArgs) -> BlissStats:
read_time = self.read_time_regex.search(proc_results)
read_time = int(read_time.group(1)) if read_time else 0

os.makedirs("./run_logs", exist_ok=True)
_, file_name = os.path.split(args.data_file)
file_name, _ = os.path.splitext(file_name + f"_{args.index_type}")
with open(os.path.join("./run_logs", file_name + ".log"), "w") as fid:
fid.write(proc_results)

return BlissStats(
preload_time=preload_time,
preload_creation_time=preload_creation,
Expand Down
34 changes: 34 additions & 0 deletions src/bliss/bench_alex.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <vector>

#include "bliss/bliss_index.h"
#include "spdlog/spdlog.h"

namespace bliss {

Expand All @@ -28,6 +29,39 @@ class BlissAlexIndex : public BlissIndex<KEY_TYPE, VALUE_TYPE> {
void put(KEY_TYPE key, VALUE_TYPE value) override {
_index.insert(key, value);
}

void end_routine() override {
auto stats = this->_index.stats_;
spdlog::info("num_keys = {}", stats.num_keys);
spdlog::info("num_model_nodes = {}", stats.num_model_nodes);
spdlog::info("num_data_nodes = {}", stats.num_data_nodes);
spdlog::info("num_expand_and_scales = {}", stats.num_expand_and_scales);
spdlog::info("num_expand_and_retrains = {}",
stats.num_expand_and_retrains);
spdlog::info("num_downward_splits = {}", stats.num_downward_splits);
spdlog::info("num_sideways_splits = {}", stats.num_sideways_splits);
spdlog::info("num_model_node_expansions = {}",
stats.num_model_node_expansions);
spdlog::info("num_model_node_splits = {}", stats.num_model_node_splits);
spdlog::info("num_downard_split_keys = {}",
stats.num_downward_split_keys);
spdlog::info("num_sideways_split_keys = {}",
stats.num_sideways_split_keys);
spdlog::info("num_model_node_expansion_pointers = {}",
stats.num_model_node_expansion_pointers);
spdlog::info("num_model_node_split_pointers = {}",
stats.num_model_node_split_pointers);
spdlog::info("num_node_lookups = {}", stats.num_node_lookups);
spdlog::info("num_lookups = {}", stats.num_lookups);
spdlog::info("num_inserts = {}", stats.num_inserts);
spdlog::info("splitting_time = {}", stats.splitting_time);
spdlog::info("cost_computation_time = {}", stats.cost_computation_time);

spdlog::info("max_fanout = {}",
this->_index.derived_params_.max_fanout);
spdlog::info("max_data_node_slots = {}",
this->_index.derived_params_.max_data_node_slots);
}
};

} // namespace bliss
Expand Down
8 changes: 5 additions & 3 deletions src/bliss/bench_btree.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,16 @@ class BlissBTreeIndex : public BlissIndex<KEY_TYPE, VALUE_TYPE> {
void bulkload(
std::vector<std::pair<KEY_TYPE, VALUE_TYPE>> values) override {
// expects the pairs to be pre-sorted before performing bulk load
_index.bulk_load(values.begin(), values.end());
this->_index.bulk_load(values.begin(), values.end());
}

bool get(KEY_TYPE key) override { return _index.exists(key); }
bool get(KEY_TYPE key) override { return this->_index.exists(key); }

void put(KEY_TYPE key, VALUE_TYPE value) override {
_index.insert(std::make_pair(key, value));
this->_index.insert(std::make_pair(key, value));
}

void end_routine() override {}
};

} // namespace bliss
Expand Down
2 changes: 2 additions & 0 deletions src/bliss/bench_lipp.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ class BlissLippIndex : public BlissIndex<KEY_TYPE, VALUE_TYPE> {
void put(KEY_TYPE key, VALUE_TYPE value) override {
_index.insert(key, value);
}

void end_routine() override {}
};

} // namespace bliss
Expand Down
1 change: 1 addition & 0 deletions src/bliss/bliss_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class BlissIndex {
virtual void bulkload(std::vector<std::pair<KEY_TYPE, VALUE_TYPE>> values);
virtual bool get(KEY_TYPE key) = 0;
virtual void put(KEY_TYPE key, VALUE_TYPE value) = 0;
virtual void end_routine() = 0;
};

} // namespace bliss
Expand Down
2 changes: 2 additions & 0 deletions src/bliss_bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,5 +270,7 @@ int main(int argc, char *argv[]) {

workload_executor(*index, data, config, 0);

index->end_routine();

return 0;
}

0 comments on commit 1ca540c

Please sign in to comment.