-
Notifications
You must be signed in to change notification settings - Fork 2
371 lines (314 loc) · 15.7 KB
/
haupt.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
name: GitHub-Hauptaktion
on:
push:
branches:
- 'main'
pull_request:
schedule:
- cron: "0 3 17 * *"
defaults:
run:
shell: bash
env:
SKIP_HYRISE: true
SKIP_HYRISE_MASTER: false
SKIP_MONETDB: true
SKIP_DUCKDB: true
SCALE_FACTOR: 0.5
CMAKE_GENERATOR: Ninja
jobs:
hyrise_full_pipeline:
name: Hyrise - Full calibration and evaluation pipeline
runs-on: ubuntu-24.04
outputs:
core_count: ${{ steps.core_client_counts.outputs.core_count }}
client_count: ${{ steps.core_client_counts.outputs.client_count }}
calibration_run: ${{ steps.calibration.outputs.calibration_run }}
steps:
- uses: actions/checkout@master
with:
submodules: recursive
- name: Install dependencies for Act setup
if: ${{ env.ACT }}
run: |
sudo apt-get update -y -qq
sudo apt-get install -y -qq git build-essential cmake python3-pip
- name: Install dependencies
run: |
sudo apt-get update -y -qq
# Add repository for older python versions. We use 3.11 as there are several issues with 3.12 (e.g., removed distutils and pip problems).
sudo add-apt-repository ppa:deadsnakes/ppa --yes
# We don't use Hyrise's install_dependencies script as it includes much more than needed for this small setup here.
sudo apt-get install -y ninja-build libboost-all-dev postgresql-server-dev-16 libtbb-dev libreadline-dev libsqlite3-dev systemtap-sdt-dev lld numactl python3.11-full python3.11-venv
python3.11 -m venv ~/venv
source ~/venv/bin/activate
python -m pip install -r python/requirements.txt --quiet
- name: Determine core and client counts for database comparison
id: core_client_counts
run: |
core_count=`grep -Pc '^processor\t' /proc/cpuinfo`
client_count=$(python -c "import math; print(int(math.ceil(${core_count}*0.75)))")
comparison_runtime=$(python -c "print(min(1800, max(300, int(${{ env.SCALE_FACTOR }}*3500))))")
echo "Using ${core_count} cores and ${client_count} clients, comparison benchmarks running for ${comparison_runtime} seconds."
echo "CORE_COUNT=${core_count}" >> $GITHUB_ENV
echo "CLIENT_COUNT=${client_count}" >> $GITHUB_ENV
echo "COMPARISON_RUNTIME=${comparison_runtime}" >> $GITHUB_ENV
echo "core_count=${core_count}" >> $GITHUB_OUTPUT
echo "client_count=${client_count}" >> $GITHUB_OUTPUT
- name: Build release server and plugins
if: env.SKIP_HYRISE == 'false'
run: |
mkdir -p encoding_plugin/rel
pushd encoding_plugin/rel > /dev/null
# Erase all encoding types. Hurts performance but allows us to compile in release mode with GitHub runners.
# Further, we use the relaxed mode as there are several issues with newer compiler (fixed in Hyrise's master,
# but not in the project's code).
cmake -DCMAKE_BUILD_TYPE=Release -DHYRISE_RELAXED_BUILD=ON -DERASE_SEGMENT_TYPES=Dictionary,LZ4,RunLength,FSST,FrameOfReference,Unencoded,FixedStringDictionary ..
cmake --build . --target hyriseServer WorkloadStatisticsPlugin WorkloadHandlerPlugin CommandExecutorPlugin DataCharacteristicsPlugin
popd > /dev/null
- name: Run calibration - data collection phase (TPC-H only)
if: env.SKIP_HYRISE == 'false'
run: |
# We have a custom CMake target that might not trigger correctly. Since we don't use TPC-DS in this GitHub Action
# run here, creating an empty directory should be fine.
rm -rf encoding_plugin/rel/resources || true # mkdir -p does not work wity symlinks. Just get it done.
mkdir -p encoding_plugin/rel/resources/benchmark/tpcds/tpcds-result-reproduction/query_qualification
source ~/venv/bin/activate
pushd python > /dev/null
python3 runner.py --hyrise_server_path=../encoding_plugin/rel/ --base_benchmark_runs=1 --single_benchmark=TPC-H --execute=calibration --scale_factor ${{ env.SCALE_FACTOR }} --random_encoding_configs_count=3
popd > /dev/null
- name: Run calibration - learn runtime and size models
id: calibration
if: env.SKIP_HYRISE == 'false'
run: |
source ~/venv/bin/activate
pushd python > /dev/null
calibration_run=`ls -t calibration | grep -v 'results' | head -n1`
# Run pipeline without selection.
python3 encoding_selection_pipeline.py --calibration_dir=calibration/${calibration_run} --skip_phases selection
popd > /dev/null
echo "calibration_run=${calibration_run}" >> $GITHUB_OUTPUT
echo "CALIBRATION_RUN=${calibration_run}" >> $GITHUB_ENV
- name: Run encoding selection
if: env.SKIP_HYRISE == 'false'
run: |
source ~/venv/bin/activate
pushd python > /dev/null
# Run selection. For simplicity: use calibration workload as workload to optimize.
python3 encoding_selection_pipeline.py --calibration_dir=calibration/${{ env.CALIBRATION_RUN }} --use_calibration_as_workload --skip_phases load_csv prepare learn_runtime learn_size --budget_steps_stretch_factor 5.0
popd > /dev/null
- name: Benchmark encoding configurations
if: env.SKIP_HYRISE == 'false'
run: |
source ~/venv/bin/activate
pushd python
python3 runner.py --hyrise_server_path=../encoding_plugin/rel/ --execute=evaluation --configurations_dir "evaluation/${{ env.CALIBRATION_RUN }}/configurations__default/TPCH" --results_dir "evaluation/${{ env.CALIBRATION_RUN }}/results/TPCH" --scale_factor ${{ env.SCALE_FACTOR }} --single_benchmark=TPCH --port 5551
popd
- name: Benchmark non-constrained Hyrise (database comparison)
if: env.SKIP_HYRISE == 'false'
run: |
source ~/venv/bin/activate
pushd python
python3 db_comparison_runner.py hyrise --hyrise_server_path=../encoding_plugin/rel/ --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --clients=${{ env.CLIENT_COUNT }} --time=${{ env.COMPARISON_RUNTIME }}
python3 db_comparison_runner.py hyrise --hyrise_server_path=../encoding_plugin/rel/ --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --determine_size_only
popd
- name: Benchmark budget-constrained Hyrise (database comparison)
if: env.SKIP_HYRISE == 'false'
run: |
source ~/venv/bin/activate
pushd python
python3 runner.py --hyrise_server_path=../encoding_plugin/rel/ --execute=evaluation --configurations_dir "evaluation/${{ env.CALIBRATION_RUN }}/configurations__default/TPCH/LPCompressionSelection" --results_dir "evaluation/${{ env.CALIBRATION_RUN }}/results/TPCH/LPCompressionSelection" --scale_factor ${{ env.SCALE_FACTOR }} --single_benchmark=TPCH --port 5551 --cores=${{ env.CORE_COUNT }} --clients=${{ env.CLIENT_COUNT }}
popd
- name: Upload benchmark results (non-constrained)
uses: actions/upload-artifact@master
if: env.SKIP_HYRISE == 'false'
with:
name: comparison_results_hyrise_non-constrained
path: |
python/db_comparison_results/*.csv
- name: Upload benchmark results (budget-constrained)
uses: actions/upload-artifact@master
if: env.SKIP_HYRISE == 'false'
with:
name: comparison_results_hyrise_budget-constrained
path: |
python/evaluation/${{ env.CALIBRATION_RUN }}/results/TPCH/*.csv
python/evaluation/${{ env.CALIBRATION_RUN }}/results/TPCH/LPCompressionSelection/*.csv
database_comparison:
name: Database Comparison
runs-on: ubuntu-24.04
outputs:
core_count: ${{ steps.core_client_counts.outputs.core_count }}
client_count: ${{ steps.core_client_counts.outputs.client_count }}
steps:
- uses: actions/checkout@master
- uses: actions/checkout@master
if: env.SKIP_MONETDB == 'false'
with:
token: ${{ secrets.PAT }}
repository: MonetDB/MonetDB
ref: 'Dec2023_9'
path: ./MonetDB
- uses: actions/checkout@master
if: env.SKIP_MONETDB == 'false'
with:
token: ${{ secrets.PAT }}
repository: MonetDBSolutions/tpch-scripts
path: ./tpch-scripts
- uses: actions/checkout@master
if: env.SKIP_DUCKDB == 'false'
with:
token: ${{ secrets.PAT }}
repository: electrum/tpch-dbgen
path: ./tpch-dbgen
- uses: actions/checkout@master
if: env.SKIP_HYRISE_MASTER == 'false'
with:
token: ${{ secrets.PAT }}
repository: hyrise/hyrise
ref: martin/fix/server_listening
path: ./hyrise_master
submodules: recursive
- name: Determine client and core counts for database comparison
id: core_client_counts
run: |
core_count=`grep -Pc '^processor\t' /proc/cpuinfo`
client_count=$(python -c "import math; print(int(math.ceil(${core_count}*0.75)))")
comparison_runtime=$(python -c "print(min(1800, max(300, int(${{ env.SCALE_FACTOR }}*3500))))")
echo "Using ${core_count} cores and ${client_count} clients, comparison benchmarks running for ${comparison_runtime} seconds."
echo "CORE_COUNT=${core_count}" >> $GITHUB_ENV
echo "CLIENT_COUNT=${client_count}" >> $GITHUB_ENV
echo "COMPARISON_RUNTIME=${comparison_runtime}" >> $GITHUB_ENV
echo "core_count=${core_count}" >> $GITHUB_OUTPUT
echo "client_count=${client_count}" >> $GITHUB_OUTPUT
- name: Install dependencies for Act setup
if: ${{ env.ACT }}
run: |
sudo apt-get update -y -qq
sudo apt-get install -y -qq git build-essential cmake python3-pip
- name: Install dependencies
run: |
sudo apt-get update -y -qq
sudo apt-get install -y -qq ninja-build libsqlite3-dev postgresql-server-dev-16 numactl bison python3-venv libboost-all-dev libtbb-dev lld libreadline-dev
python3 -m venv ~/venv
source ~/venv/bin/activate
pip3 install -r python/requirements.txt # Not using --quiet to log the installed DuckDB version.
- name: Setup MonetDB
if: env.SKIP_MONETDB == 'false'
run: |
pushd MonetDB
mkdir rel
pushd rel
cmake -DCMAKE_INSTALL_PREFIX=~/monetdb_bin/ -DASSERT=OFF -DCMAKE_BUILD_TYPE=Release .. 1> /dev/null
cmake --build . --target install
echo "${HOME}/monetdb_bin/bin" >> $GITHUB_PATH
popd
popd
- name: Generate TPC-H data set (MonetDB)
if: env.SKIP_MONETDB == 'false'
run: |
mkdir -p monetdb_farm
pushd tpch-scripts
./tpch_build.sh -s ${{ env.SCALE_FACTOR }} -f ~/monetdb_farm
popd
- name: Benchmark MonetDB (database comparison)
if: env.SKIP_MONETDB == 'false'
run: |
pushd python
source ~/venv/bin/activate
python3 db_comparison_runner.py monetdb --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --clients=${{ env.CLIENT_COUNT }} --time=${{ env.COMPARISON_RUNTIME }}
python3 db_comparison_runner.py monetdb --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --determine_size_only
popd
- name: Generate TPC-H data set (for DuckDB and Umbra)
if: env.SKIP_DUCKDB == 'false'
run: |
pushd tpch-dbgen
make &> /dev/null
./dbgen -s ${{ env.SCALE_FACTOR }} -f
# Unsure what happens here, but without mode change, DuckDB cannot read nation.tbl as it has mode 111 (seen
# only with GH action on 24.04).
chmod 644 *.tbl
mkdir -p sf${{ env.SCALE_FACTOR }}
mv *.tbl sf${{ env.SCALE_FACTOR }}
popd
mv tpch-dbgen ~
- name: Benchmark DuckDB (database comparison)
if: env.SKIP_DUCKDB == 'false'
run: |
pushd python
source ~/venv/bin/activate
python3 db_comparison_runner.py duckdb --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --clients=${{ env.CLIENT_COUNT }} --time=${{ env.COMPARISON_RUNTIME }}
python3 db_comparison_runner.py duckdb --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --determine_size_only
popd
- name: Build Hyrise (master) release server
if: env.SKIP_HYRISE_MASTER == 'false'
run: |
mkdir -p hyrise_master/rel
pushd hyrise_master/rel > /dev/null
# Erase all encoding types. Hurts performance but allows us to compile in release mode with GitHub runners.
cmake -DCMAKE_BUILD_TYPE=Release -DHYRISE_RELAXED_BUILD=ON -DERASE_SEGMENT_TYPES=Dictionary,LZ4,RunLength,FSST,FrameOfReference,Unencoded,FixedStringDictionary ..
cmake --build . --target hyriseServer
popd > /dev/null
- name: Benchmark Hyrise (master, database comparison)
if: env.SKIP_HYRISE_MASTER == 'false'
run: |
pushd python
source ~/venv/bin/activate
python3 db_comparison_runner.py hyrise --hyrise_server_path=../hyrise_master/rel/ --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --clients=${{ env.CLIENT_COUNT }} --time=301
python3 db_comparison_runner.py hyrise --hyrise_server_path=../hyrise_master/rel/ --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --determine_size_only
mv db_comparison_results/database_comparison__TPC-H__hyrise.csv db_comparison_results/database_comparison__TPC-H__hyrise_master.csv
mv db_comparison_results/size_hyrise__SF${{env.SCALE_FACTOR}}.csv db_comparison_results/size_hyrise_master__SF${{env.SCALE_FACTOR}}.csv
popd
- name: Upload benchmark results
uses: actions/upload-artifact@master
if: env.SKIP_DUCKDB == 'false' || env.SKIP_MONETDB == 'false' || env.SKIP_HYRISE_MASTER == 'false'
with:
name: comparison_results
path: |
python/db_comparison_results/*.csv
plotting:
needs: [hyrise_full_pipeline, database_comparison]
name: Plotting
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@master
- uses: r-lib/actions/setup-r@v2
- uses: actions/download-artifact@master
with:
name: comparison_results
path: results_to_plot
- uses: actions/download-artifact@master
with:
name: comparison_results_hyrise_non-constrained
path: results_to_plot
- uses: actions/download-artifact@master
with:
name: comparison_results_hyrise_budget-constrained
path: results_to_plot
- name: Set environment variables
run: |
echo "HYRISE_CORE_COUNT=${{ needs.hyrise_full_pipeline.outputs.core_count }}" >> $GITHUB_ENV
echo "HYRISE_CLIENT_COUNT=${{ needs.hyrise_full_pipeline.outputs.client_count }}" >> $GITHUB_ENV
echo "CALIBRATION_RUN=${{ needs.hyrise_full_pipeline.outputs.calibration_run }}" >> $GITHUB_ENV
echo "COMPARISON_CORE_COUNT=${{ needs.database_comparison.outputs.core_count }}" >> $GITHUB_ENV
echo "COMPARISON_CLIENT_COUNT=${{ needs.database_comparison.outputs.client_count }}" >> $GITHUB_ENV
ls -lisa results_to_plot
# Install R packages (install action did not work with act)
- name: Install dependencies
run: |
install.packages(c(
"dplyr",
"ggplot2",
"ggrepel"
))
shell: Rscript {0}
- name: Plot
run: |
source("R/plot.R")
shell: Rscript {0}
- name: Upload database comparison plot
uses: actions/upload-artifact@master
with:
name: database_comparison
path: |
db_comparison.pdf