-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
benchmark.py
90 lines (81 loc) · 3 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
import sys
import psutil
from datetime import datetime
from contextlib import contextmanager
# Import Everything for equal memory conditions
import duckdb
import chdb
from chdb import session as chs
import glaredb
from databend import SessionContext
import datafusion
DBNAME = os.getenv('DBNAME', '*')
ITERATIONS = int(os.getenv('ITERATIONS', 3))
BENCHMARKS = ["version", "count", "groupby"]
@contextmanager
def suppress_stdout():
with open(os.devnull, "w") as devnull:
old_stdout = sys.stdout
sys.stdout = devnull
try:
yield
finally:
sys.stdout = old_stdout
def get_memory_usage():
""" Returns the current memory usage of the Python process. """
process = psutil.Process(os.getpid())
return process.memory_info().rss / (1024 * 1024) # Convert bytes to megabytes
def load_query(db: str, name: str) -> str:
""" Load SQL query from file. """
try:
with open(f"queries/{name}.{db}.sql") as f:
return f.read()
except FileNotFoundError:
print(f"Query file for {name} not found.")
sys.exit(1)
def benchmark_db(db: str, execute_fn):
""" Benchmarks all queries against one datastore """
for name in BENCHMARKS:
query = load_query(db, name)
deltas = []
mem_usage_before = get_memory_usage()
for _ in range(ITERATIONS):
start = datetime.now()
with suppress_stdout():
try:
results = execute_fn(query)
except Exception as e:
print(f"Error executing query on {db}: {e}")
continue
end = datetime.now()
deltas.append((end - start).total_seconds())
mem_usage_after = get_memory_usage()
if deltas:
avg = sum(deltas) / len(deltas)
mem_used = mem_usage_after - mem_usage_before
print(f"{db}:{name}: avg={avg:.3f}s min={min(deltas):.3f}s max={max(deltas):.3f}s ({ITERATIONS} runs) | Memory used: {mem_used:.2f} MB")
def main():
match DBNAME:
case "chdb":
print("Testing chdb " + str(chdb.engine_version))
chdbs = chs.Session()
benchmark_db("chdb", lambda query: chdb.query(query))
case "duckdb":
print("Testing duckdb " + str(duckdb.__version__))
ddb = duckdb.connect()
benchmark_db("duckdb", lambda query: ddb.execute(query))
case "glaredb":
print("Testing glaredb")
gdb = glaredb.connect()
benchmark_db("glaredb", lambda query: gdb.sql(query).show())
case "databend":
print("Testing databend")
databendx = SessionContext()
benchmark_db("databend", lambda query: databendx.sql(query).collect())
case "datafusion":
print("Testing datafusion")
datafusionx = datafusion.SessionContext()
benchmark_db("datafusion", lambda query: datafusionx.sql(query).collect())
if __name__ == "__main__":
main()