Skip to content

benchmark-merge

benchmark-merge #647

Workflow file for this run

name: Merge Benchmarks
on:
repository_dispatch:
types: [ benchmark-merge ]
env:
SCRIPT_DIR: '.github/scripts/merge-perf'
RESULT_TABLE_NAME: 'merge_perf_results'
DOLTHUB_DB: 'import-perf/merge-perf'
jobs:
bench:
name: Benchmark
defaults:
run:
shell: bash
strategy:
fail-fast: true
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.client_payload.version }}
- name: Set up Go 1.x
id: go
uses: actions/setup-go@v5
with:
go-version-file: go/go.mod
- name: Setup Python 3.x
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Dolt version
id: version
run: |
version=${{ github.event.client_payload.version }}
- name: Install dolt
working-directory: ./go
run: go install ./cmd/dolt
- name: Config dolt
id: config
run: |
dolt config --global --add user.email "[email protected]"
dolt config --global --add user.name "merge-perf"
- name: Run bench
id: bench
run: |
gw=$GITHUB_WORKSPACE
DATADIR=$gw/data
# initialize results sql import
RESULTS=$gw/results.sql
echo "CREATE TABLE ${{env.RESULT_TABLE_NAME }} (name varchar(50) primary key, table_cnt int, run_cnt int, add_cnt int, delete_cnt int, update_cnt int, conflict_cnt int, fks bool, latency float);" >> $RESULTS
# parameters for testing
ROW_NUM=1000000
TABLE_NUM=2
EDIT_CNT=60000
names=('adds_only' 'deletes_only' 'updates_only' 'adds_updates_deletes')
adds=($EDIT_CNT 0 0 $EDIT_CNT)
deletes=(0 $EDIT_CNT 0 $EDIT_CNT)
updates=(0 0 $EDIT_CNT $EDIT_CNT)
wd=$(pwd)
for i in {0..3}; do
cd $wd
echo "${names[$i]}, ${adds[$i]}, ${deletes[$i]}, ${updates[$i]}"
# data.py creates files for import
python ${{ env.SCRIPT_DIR }}/data.py $DATADIR $TABLE_NUM $ROW_NUM ${adds[$i]} ${deletes[$i]} ${updates[$i]}
# setup.sh runs the import and commit process for a set of data files
TMPDIR=$gw/tmp
./${{ env.SCRIPT_DIR}}/setup.sh $TMPDIR $DATADIR
# small python script times merge, we suppres errcodes but print error messages
cd $TMPDIR
python3 -c "import time, subprocess, sys; start = time.time(); res=subprocess.run(['dolt', 'merge', '--squash', 'main'], capture_output=True); err = res.stdout + res.stderr if res.returncode != 0 else ''; latency = time.time() -start; print(latency); sys.stderr.write(str(err))" 1> lat.log 2>err.log
latency=$(cat lat.log)
cat err.log
# count conflicts in first table
conflicts=$(dolt sql -r csv -q "select count(*) from dolt_conflicts_table0;" | tail -1)
echo "INSERT INTO ${{ env.RESULT_TABLE_NAME }} values ('"${names[$i]}"', $TABLE_NUM, $ROW_NUM, ${adds[$i]}, ${deletes[$i]}, ${updates[$i]}, $conflicts, true, $latency);" >> $RESULTS
done
echo "result_path=$RESULTS" >> $GITHUB_OUTPUT
- name: Report
id: report
run: |
gw=$GITHUB_WORKSPACE
in="${{ steps.bench.outputs.result_path }}"
query="select name, add_cnt, delete_cnt, update_cnt, round(latency, 2) as latency from ${{ env.RESULT_TABLE_NAME }}"
summaryq="select round(avg(latency), 2) as avg from ${{ env.RESULT_TABLE_NAME }}"
out="$gw/results.csv"
dolt_dir="$gw/merge-perf"
dolt config --global --add user.email "[email protected]"
dolt config --global --add user.name "merge-perf"
echo '${{ secrets.DOLTHUB_IMPORT_PERF_CREDS_VALUE }}' | dolt creds import
dolt clone ${{ env.DOLTHUB_DB }} "$dolt_dir"
cd "$dolt_dir"
branch="${{ github.event.client_payload.commit_to_branch }}"
# checkout branch
if [ -z $(dolt sql -q "select 1 from dolt_branches where name = '$branch';") ]; then
dolt checkout -b $branch
else
dolt checkout $branch
fi
dolt sql -q "drop table if exists ${{ env.RESULT_TABLE_NAME }}"
# load results
dolt sql < "$in"
# push results to dolthub
dolt add ${{ env.RESULT_TABLE_NAME }}
dolt commit -m "CI commit"
dolt push -f origin $branch
# generate report
dolt sql -r csv -q "$query" > "$out"
cat "$out"
echo "report_path=$out" >> $GITHUB_OUTPUT
avg=$(dolt sql -r csv -q "$summaryq" | tail -1)
echo "avg=$avg" >> $GITHUB_OUTPUT
- name: Format Results
id: html
if: ${{ github.event.client_payload.email_recipient }} != ""
run: |
gw="$GITHUB_WORKSPACE"
in="${{ steps.report.outputs.report_path }}"
out="$gw/results.html"
echo "<table>" > "$out"
print_header=true
while read line; do
if "$print_header"; then
echo " <tr><th>${line//,/</th><th>}</th></tr>" >> "$out"
print_header=false
continue
fi
echo " <tr><td>${line//,/</td><td>}</td></tr>" >> "$out"
done < "$in"
echo "</table>" >> "$out"
avg="${{ steps.report.outputs.avg }}"
echo "<table><tr><th>Average</th></tr><tr><td>$avg</tr></td></table>" >> "$out"
cat "$out"
echo "html=$(echo $out)" >> $GITHUB_OUTPUT
- name: Configure AWS Credentials
if: ${{ github.event.client_payload.email_recipient }} != ""
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Send Email
uses: ./.github/actions/ses-email-action
if: ${{ github.event.client_payload.email_recipient }} != ""
with:
region: us-west-2
toAddresses: '["${{ github.event.client_payload.email_recipient }}"]'
subject: 'Merge Performance Benchmarks: ${{ github.event.client_payload.version }}'
bodyPath: ${{ steps.html.outputs.html }}
template: 'SysbenchTemplate'
- name: Read CSV
if: ${{ github.event.client_payload.issue_id }} != ""
id: csv
uses: juliangruber/read-file-action@v1
with:
path: "${{ steps.report.outputs.report_path }}"
- name: Create MD
if: ${{ github.event.client_payload.issue_id }} != ""
uses: dolthub/csv-to-md-table-action@v4
id: md
with:
csvinput: ${{ steps.csv.outputs.content }}
- uses: mshick/add-pr-comment@v2
if: ${{ github.event.client_payload.issue_id }} != ""
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
issue: ${{ github.event.client_payload.issue_id }}
message-failure: merge benchmark failed
message-cancelled: merge benchmark cancelled
allow-repeats: true
message: |
@${{ github.event.client_payload.actor }} __DOLT__
${{ steps.md.outputs.markdown-table }}