-
Notifications
You must be signed in to change notification settings - Fork 2
334 lines (284 loc) · 13.8 KB
/
haupt.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
name: GitHub-Hauptaktion
on:
push:
pull_request:
schedule:
- cron: "0 3 17 * *"
defaults:
run:
shell: bash
env:
SKIP_HYRISE: false
SKIP_MONETDB: false
SKIP_DUCKDB: false
SCALE_FACTOR: 0.5
CMAKE_GENERATOR: Ninja
jobs:
hyrise_full_pipeline:
name: Hyrise - Full calibration and evaluation pipeline
runs-on: ubuntu-24.04
outputs:
core_count: ${{ steps.core_client_counts.outputs.core_count }}
client_count: ${{ steps.core_client_counts.outputs.client_count }}
calibration_run: ${{ steps.calibration.outputs.calibration_run }}
steps:
- uses: actions/checkout@master
with:
submodules: recursive
- name: Install dependencies for Act setup
if: ${{ env.ACT }}
run: |
sudo apt-get update -y -qq
sudo apt-get install -y -qq git build-essential cmake python3-pip
- name: Install dependencies
run: |
sudo apt-get update -y -qq
sudo add-apt-repository ppa:deadsnakes/ppa --yes
sudo apt search postgresql-server-dev
# We don't use Hyrise's install_dependencies script as it includes much more than needed for this small setup here.
sudo apt-get install -y ninja-build libboost-all-dev postgresql-server-dev-16 libtbb-dev libreadline-dev libsqlite3-dev systemtap-sdt-dev lld numactl python3.11-full python3.11-venv
python3.11 -m venv ~/venv
source ~/venv/bin/activate
#python -m ensurepip --upgrade
#python -m pip install --upgrade pip
#python -m pip install --upgrade setuptools
#pip3 install setuptools # --quiet ... needed for latest Python version, re-check later
python -m pip install -r python/requirements.txt # --quiet
- name: Determine core and client counts for database comparison
id: core_client_counts
run: |
core_count=`grep -Pc '^processor\t' /proc/cpuinfo`
client_count=$(python -c "import math; print(int(math.ceil(${core_count}*0.75)))")
comparison_runtime=$(python -c "print(min(1800, max(300, int(${{ env.SCALE_FACTOR }}*3500))))")
echo "Using ${core_count} cores and ${client_count} clients, comparison benchmarks running for ${comparison_runtime} seconds."
echo "CORE_COUNT=${core_count}" >> $GITHUB_ENV
echo "CLIENT_COUNT=${client_count}" >> $GITHUB_ENV
echo "COMPARISON_RUNTIME=${comparison_runtime}" >> $GITHUB_ENV
echo "core_count=${core_count}" >> $GITHUB_OUTPUT
echo "client_count=${client_count}" >> $GITHUB_OUTPUT
- name: Build release server and plugins
if: env.SKIP_HYRISE == 'false'
run: |
mkdir -p encoding_plugin/rel
pushd encoding_plugin/rel > /dev/null
# Erase all encoding types. Hurts performance but allows us to compile in release mode with GitHub runners.
cmake -DCMAKE_BUILD_TYPE=Release -DHYRISE_RELAXED_BUILD=ON -DERASE_SEGMENT_TYPES=Dictionary,LZ4,RunLength,FSST,FrameOfReference,Unencoded,FixedStringDictionary ..
cmake --build . --target hyriseServer WorkloadStatisticsPlugin WorkloadHandlerPlugin CommandExecutorPlugin DataCharacteristicsPlugin
popd > /dev/null
- name: Run calibration - data collection phase (TPC-H only)
if: env.SKIP_HYRISE == 'false'
run: |
# We have a custom CMake target that might not trigger correctly. Since we don't use TPC-DS in this GitHub Action
# run here, creating an empty directory should be fine.
rm -rf encoding_plugin/rel/resources || true # mkdir -p does not work wity symlinks. Just get it done.
mkdir -p encoding_plugin/rel/resources/benchmark/tpcds/tpcds-result-reproduction/query_qualification
source ~/venv/bin/activate
pushd python > /dev/null
python3 runner.py --hyrise_server_path=../encoding_plugin/rel/ --base_benchmark_runs=1 --single_benchmark=TPC-H --execute=calibration --scale_factor ${{ env.SCALE_FACTOR }} --random_encoding_configs_count=3
popd > /dev/null
- name: Run calibration - learn runtime and size models
id: calibration
if: env.SKIP_HYRISE == 'false'
run: |
source ~/venv/bin/activate
pushd python > /dev/null
calibration_run=`ls -t calibration | grep -v 'results' | head -n1`
# Run pipeline without selection.
python3 encoding_selection_pipeline.py --calibration_dir=calibration/${calibration_run} --skip_phases selection
popd > /dev/null
echo "calibration_run=${calibration_run}" >> $GITHUB_OUTPUT
echo "CALIBRATION_RUN=${calibration_run}" >> $GITHUB_ENV
- name: Run encoding selection
if: env.SKIP_HYRISE == 'false'
run: |
source ~/venv/bin/activate
pushd python > /dev/null
# Run selection. For simplicity: use calibration workload as workload to optimize.
python3 encoding_selection_pipeline.py --calibration_dir=calibration/${{ env.CALIBRATION_RUN }} --use_calibration_as_workload --skip_phases load_csv prepare learn_runtime learn_size --budget_steps_stretch_factor 5.0
popd > /dev/null
- name: Benchmark encoding configurations
if: env.SKIP_HYRISE == 'false'
run: |
source ~/venv/bin/activate
pushd python
python3 runner.py --hyrise_server_path=../encoding_plugin/rel/ --execute=evaluation --configurations_dir "evaluation/${{ env.CALIBRATION_RUN }}/configurations__default/TPCH" --results_dir "evaluation/${{ env.CALIBRATION_RUN }}/results/TPCH" --scale_factor ${{ env.SCALE_FACTOR }} --single_benchmark=TPCH --port 5551
popd
- name: Benchmark non-constrained Hyrise (database comparison)
if: env.SKIP_HYRISE == 'false'
run: |
source ~/venv/bin/activate
pushd python
python3 db_comparison_runner.py hyrise --hyrise_server_path=../encoding_plugin/rel/ --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --clients=${{ env.CLIENT_COUNT }} --time=${{ env.COMPARISON_RUNTIME }}
python3 db_comparison_runner.py hyrise --hyrise_server_path=../encoding_plugin/rel/ --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --determine_size_only
popd
- name: Benchmark budget-constrained Hyrise (database comparison)
if: env.SKIP_HYRISE == 'false'
run: |
source ~/venv/bin/activate
pushd python
python3 runner.py --hyrise_server_path=../encoding_plugin/rel/ --execute=evaluation --configurations_dir "evaluation/${{ env.CALIBRATION_RUN }}/configurations__default/TPCH/LPCompressionSelection" --results_dir "evaluation/${{ env.CALIBRATION_RUN }}/results/TPCH/LPCompressionSelection" --scale_factor ${{ env.SCALE_FACTOR }} --single_benchmark=TPCH --port 5551 --cores=${{ env.CORE_COUNT }} --clients=${{ env.CLIENT_COUNT }}
popd
- name: Upload benchmark results (non-constrained)
uses: actions/upload-artifact@master
if: env.SKIP_HYRISE == 'false'
with:
name: comparison_results_hyrise_non-constrained
path: |
python/db_comparison_results/*.csv
- name: Upload benchmark results (budget-constrained)
uses: actions/upload-artifact@master
if: env.SKIP_HYRISE == 'false'
with:
name: comparison_results_hyrise_budget-constrained
path: |
python/evaluation/${{ env.CALIBRATION_RUN }}/results/TPCH/*.csv
python/evaluation/${{ env.CALIBRATION_RUN }}/results/TPCH/LPCompressionSelection/*.csv
database_comparison:
name: Database Comparison
runs-on: ubuntu-24.04
outputs:
core_count: ${{ steps.core_client_counts.outputs.core_count }}
client_count: ${{ steps.core_client_counts.outputs.client_count }}
steps:
- uses: actions/checkout@master
- uses: actions/checkout@master
if: env.SKIP_MONETDB == 'false'
with:
token: ${{ secrets.PAT }}
repository: MonetDB/MonetDB
ref: 'Sep2022_7' # checking out the latest tag as the current master does not compile with GCC 11 (as of 2022-11-17)
path: ./MonetDB
- uses: actions/checkout@master
if: env.SKIP_MONETDB == 'false'
with:
token: ${{ secrets.PAT }}
repository: MonetDBSolutions/tpch-scripts
path: ./tpch-scripts
- uses: actions/checkout@master
if: env.SKIP_DUCKDB == 'false'
with:
token: ${{ secrets.PAT }}
repository: electrum/tpch-dbgen
path: ./tpch-dbgen
- name: Determine client and core counts for database comparison
id: core_client_counts
run: |
core_count=`grep -Pc '^processor\t' /proc/cpuinfo`
client_count=$(python -c "import math; print(int(math.ceil(${core_count}*0.75)))")
comparison_runtime=$(python -c "print(min(1800, max(300, int(${{ env.SCALE_FACTOR }}*3500))))")
echo "Using ${core_count} cores and ${client_count} clients, comparison benchmarks running for ${comparison_runtime} seconds."
echo "CORE_COUNT=${core_count}" >> $GITHUB_ENV
echo "CLIENT_COUNT=${client_count}" >> $GITHUB_ENV
echo "COMPARISON_RUNTIME=${comparison_runtime}" >> $GITHUB_ENV
echo "core_count=${core_count}" >> $GITHUB_OUTPUT
echo "client_count=${client_count}" >> $GITHUB_OUTPUT
- name: Install dependencies for Act setup
if: ${{ env.ACT }}
run: |
sudo apt-get update -y -qq
sudo apt-get install -y -qq git build-essential cmake python3-pip
- name: Install dependencies
run: |
sudo apt-get update -y -qq
DEBIAN_FRONTEND=noninteractive sudo apt-get install -y -qq ninja-build libsqlite3-dev postgresql-server-dev-16 numactl bison python3-venv
python3 -m venv ~/venv
source ~/venv/bin/activate
pip3 install -r python/requirements.txt #--quiet
- name: Setup MonetDB
if: env.SKIP_MONETDB == 'false'
run: |
pushd MonetDB
mkdir rel
pushd rel
cmake -DCMAKE_INSTALL_PREFIX=~/monetdb_bin/ -DASSERT=OFF -DCMAKE_BUILD_TYPE=Release .. 1> /dev/null
cmake --build . --target install
echo "${HOME}/monetdb_bin/bin" >> $GITHUB_PATH
popd
popd
- name: Generate TPC-H data set (MonetDB)
if: env.SKIP_MONETDB == 'false'
run: |
mkdir -p monetdb_farm
pushd tpch-scripts
./tpch_build.sh -s ${{ env.SCALE_FACTOR }} -f ~/monetdb_farm
popd
- name: Benchmark MonetDB (database comparison)
if: env.SKIP_MONETDB == 'false'
run: |
pushd python
source ~/venv/bin/activate
python3 db_comparison_runner.py monetdb --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --clients=${{ env.CLIENT_COUNT }} --time=${{ env.COMPARISON_RUNTIME }}
python3 db_comparison_runner.py monetdb --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --determine_size_only
popd
- name: Generate TPC-H data set (for DuckDB and Umbra)
if: env.SKIP_DUCKDB == 'false'
run: |
pushd tpch-dbgen
make &> /dev/null
./dbgen -s ${{ env.SCALE_FACTOR }} -f
mkdir -p sf${{ env.SCALE_FACTOR }}
mv *.tbl sf${{ env.SCALE_FACTOR }}
popd
mv tpch-dbgen ~
- name: Benchmark DuckDB (database comparison)
if: env.SKIP_DUCKDB == 'false'
run: |
pushd python
ls -lisa /home/runner/tpch-dbgen
ls -lisa /home/runner/tpch-dbgen/sf0.5
source ~/venv/bin/activate
python3 db_comparison_runner.py duckdb --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --clients=${{ env.CLIENT_COUNT }} --time=${{ env.COMPARISON_RUNTIME }}
python3 db_comparison_runner.py duckdb --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --determine_size_only
popd
- name: Upload benchmark results
uses: actions/upload-artifact@master
if: env.SKIP_DUCKDB == 'false' || env.SKIP_MONETDB == 'false'
with:
name: comparison_results
path: |
python/db_comparison_results/*.csv
plotting:
needs: [hyrise_full_pipeline, database_comparison]
name: Plotting
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@master
- uses: r-lib/actions/setup-r@v2
- uses: actions/download-artifact@master
with:
name: comparison_results
path: results_to_plot
- uses: actions/download-artifact@master
with:
name: comparison_results_hyrise_non-constrained
path: results_to_plot
- uses: actions/download-artifact@master
with:
name: comparison_results_hyrise_budget-constrained
path: results_to_plot
- name: Set environment variables
run: |
echo "HYRISE_CORE_COUNT=${{ needs.hyrise_full_pipeline.outputs.core_count }}" >> $GITHUB_ENV
echo "HYRISE_CLIENT_COUNT=${{ needs.hyrise_full_pipeline.outputs.client_count }}" >> $GITHUB_ENV
echo "CALIBRATION_RUN=${{ needs.hyrise_full_pipeline.outputs.calibration_run }}" >> $GITHUB_ENV
echo "COMPARISON_CORE_COUNT=${{ needs.database_comparison.outputs.core_count }}" >> $GITHUB_ENV
echo "COMPARISON_CLIENT_COUNT=${{ needs.database_comparison.outputs.client_count }}" >> $GITHUB_ENV
# Install R packages (install action did not work with act)
- name: Install dependencies
run: |
install.packages(c(
"dplyr",
"ggplot2",
"ggrepel"
))
shell: Rscript {0}
- name: Plot
run: |
source("R/plot.R")
shell: Rscript {0}
- name: Upload database comparison plot
uses: actions/upload-artifact@master
with:
name: database_comparison
path: |
db_comparison.pdf