benchmark-merge #647

Summary
Jobs
- Benchmark
Run details
- Usage
- Workflow file

Workflow file for this run

.github/workflows/merge-perf.yaml at 1bf5606

	name: Merge Benchmarks
	on:
	repository_dispatch:
	types: [ benchmark-merge ]
	env:
	SCRIPT_DIR: '.github/scripts/merge-perf'
	RESULT_TABLE_NAME: 'merge_perf_results'
	DOLTHUB_DB: 'import-perf/merge-perf'
	jobs:
	bench:
	name: Benchmark
	defaults:
	run:
	shell: bash
	strategy:
	fail-fast: true
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v4
	with:
	ref: ${{ github.event.client_payload.version }}

	- name: Set up Go 1.x
	id: go
	uses: actions/setup-go@v5
	with:
	go-version-file: go/go.mod

	- name: Setup Python 3.x
	uses: actions/setup-python@v5
	with:
	python-version: "3.10"

	- name: Dolt version
	id: version
	run: \|
	version=${{ github.event.client_payload.version }}

	- name: Install dolt
	working-directory: ./go
	run: go install ./cmd/dolt

	- name: Config dolt
	id: config
	run: \|
	dolt config --global --add user.email "[email protected]"
	dolt config --global --add user.name "merge-perf"

	- name: Run bench
	id: bench
	run: \|
	gw=$GITHUB_WORKSPACE
	DATADIR=$gw/data

	# initialize results sql import
	RESULTS=$gw/results.sql
	echo "CREATE TABLE ${{env.RESULT_TABLE_NAME }} (name varchar(50) primary key, table_cnt int, run_cnt int, add_cnt int, delete_cnt int, update_cnt int, conflict_cnt int, fks bool, latency float);" >> $RESULTS

	# parameters for testing
	ROW_NUM=1000000
	TABLE_NUM=2
	EDIT_CNT=60000
	names=('adds_only' 'deletes_only' 'updates_only' 'adds_updates_deletes')
	adds=($EDIT_CNT 0 0 $EDIT_CNT)
	deletes=(0 $EDIT_CNT 0 $EDIT_CNT)
	updates=(0 0 $EDIT_CNT $EDIT_CNT)

	wd=$(pwd)
	for i in {0..3}; do
	cd $wd
	echo "${names[$i]}, ${adds[$i]}, ${deletes[$i]}, ${updates[$i]}"

	# data.py creates files for import
	python ${{ env.SCRIPT_DIR }}/data.py $DATADIR $TABLE_NUM $ROW_NUM ${adds[$i]} ${deletes[$i]} ${updates[$i]}

	# setup.sh runs the import and commit process for a set of data files
	TMPDIR=$gw/tmp
	./${{ env.SCRIPT_DIR}}/setup.sh $TMPDIR $DATADIR

	# small python script times merge, we suppres errcodes but print error messages
	cd $TMPDIR
	python3 -c "import time, subprocess, sys; start = time.time(); res=subprocess.run(['dolt', 'merge', '--squash', 'main'], capture_output=True); err = res.stdout + res.stderr if res.returncode != 0 else ''; latency = time.time() -start; print(latency); sys.stderr.write(str(err))" 1> lat.log 2>err.log
	latency=$(cat lat.log)
	cat err.log

	# count conflicts in first table
	conflicts=$(dolt sql -r csv -q "select count(*) from dolt_conflicts_table0;" \| tail -1)

	echo "INSERT INTO ${{ env.RESULT_TABLE_NAME }} values ('"${names[$i]}"', $TABLE_NUM, $ROW_NUM, ${adds[$i]}, ${deletes[$i]}, ${updates[$i]}, $conflicts, true, $latency);" >> $RESULTS
	done
	echo "result_path=$RESULTS" >> $GITHUB_OUTPUT

	- name: Report
	id: report
	run: \|
	gw=$GITHUB_WORKSPACE
	in="${{ steps.bench.outputs.result_path }}"
	query="select name, add_cnt, delete_cnt, update_cnt, round(latency, 2) as latency from ${{ env.RESULT_TABLE_NAME }}"
	summaryq="select round(avg(latency), 2) as avg from ${{ env.RESULT_TABLE_NAME }}"

	out="$gw/results.csv"
	dolt_dir="$gw/merge-perf"

	dolt config --global --add user.email "[email protected]"
	dolt config --global --add user.name "merge-perf"

	echo '${{ secrets.DOLTHUB_IMPORT_PERF_CREDS_VALUE }}' \| dolt creds import
	dolt clone ${{ env.DOLTHUB_DB }} "$dolt_dir"

	cd "$dolt_dir"

	branch="${{ github.event.client_payload.commit_to_branch }}"
	# checkout branch
	if [ -z $(dolt sql -q "select 1 from dolt_branches where name = '$branch';") ]; then
	dolt checkout -b $branch
	else
	dolt checkout $branch
	fi

	dolt sql -q "drop table if exists ${{ env.RESULT_TABLE_NAME }}"

	# load results
	dolt sql < "$in"

	# push results to dolthub
	dolt add ${{ env.RESULT_TABLE_NAME }}
	dolt commit -m "CI commit"
	dolt push -f origin $branch

	# generate report
	dolt sql -r csv -q "$query" > "$out"

	cat "$out"
	echo "report_path=$out" >> $GITHUB_OUTPUT

	avg=$(dolt sql -r csv -q "$summaryq" \| tail -1)
	echo "avg=$avg" >> $GITHUB_OUTPUT

	- name: Format Results
	id: html
	if: ${{ github.event.client_payload.email_recipient }} != ""
	run: \|
	gw="$GITHUB_WORKSPACE"
	in="${{ steps.report.outputs.report_path }}"
	out="$gw/results.html"

	echo "<table>" > "$out"
	print_header=true
	while read line; do
	if "$print_header"; then
	echo " <tr><th>${line//,/</th><th>}</th></tr>" >> "$out"
	print_header=false
	continue
	fi
	echo " <tr><td>${line//,/</td><td>}</td></tr>" >> "$out"
	done < "$in"
	echo "</table>" >> "$out"

	avg="${{ steps.report.outputs.avg }}"
	echo "<table><tr><th>Average</th></tr><tr><td>$avg</tr></td></table>" >> "$out"

	cat "$out"
	echo "html=$(echo $out)" >> $GITHUB_OUTPUT

	- name: Configure AWS Credentials
	if: ${{ github.event.client_payload.email_recipient }} != ""
	uses: aws-actions/configure-aws-credentials@v4
	with:
	aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
	aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	aws-region: us-west-2

	- name: Send Email
	uses: ./.github/actions/ses-email-action
	if: ${{ github.event.client_payload.email_recipient }} != ""
	with:
	region: us-west-2
	toAddresses: '["${{ github.event.client_payload.email_recipient }}"]'
	subject: 'Merge Performance Benchmarks: ${{ github.event.client_payload.version }}'
	bodyPath: ${{ steps.html.outputs.html }}
	template: 'SysbenchTemplate'

	- name: Read CSV
	if: ${{ github.event.client_payload.issue_id }} != ""
	id: csv
	uses: juliangruber/read-file-action@v1
	with:
	path: "${{ steps.report.outputs.report_path }}"

	- name: Create MD
	if: ${{ github.event.client_payload.issue_id }} != ""
	uses: dolthub/csv-to-md-table-action@v4
	id: md
	with:
	csvinput: ${{ steps.csv.outputs.content }}

	- uses: mshick/add-pr-comment@v2
	if: ${{ github.event.client_payload.issue_id }} != ""
	with:
	repo-token: ${{ secrets.GITHUB_TOKEN }}
	issue: ${{ github.event.client_payload.issue_id }}
	message-failure: merge benchmark failed
	message-cancelled: merge benchmark cancelled
	allow-repeats: true
	message: \|
	@${{ github.event.client_payload.actor }} __DOLT__
	${{ steps.md.outputs.markdown-table }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

benchmark-merge #647

Workflow file

benchmark-merge #647

Jobs

Run details

Workflow file for this run