Skip to content

Commit

Permalink
Add backfill benchmarks (#412)
Browse files Browse the repository at this point in the history
This change adds a benchmark that run against 10k, 100k and 1 million
rows.

They benchmark:

* How long it takes to complete a full back fill of a single column
* How long it takes to update all rows in a table with and without a
migration trigger in place

This should give us a baseline metric that we can use to compare
performance over time.

Example output:

```
make bench 
go test ./internal/benchmarks -v -benchtime=1x -bench .
2024/10/21 12:44:01 github.com/testcontainers/testcontainers-go - Connected to docker: 
  Server Version: 27.2.0
  API Version: 1.46
  Operating System: Docker Desktop
  Total Memory: 7838 MB
  Labels:
    com.docker.desktop.address=unix:///Users/ryan/Library/Containers/com.docker.docker/Data/docker-cli.sock
  Testcontainers for Go Version: v0.33.0
  Resolved Docker Host: unix:///var/run/docker.sock
  Resolved Docker Socket Path: /var/run/docker.sock
  Test SessionID: 816adaef777204b01d23a061c6f5532ca8cea098c7f8c6a68fdf542fbfa73f6e
  Test ProcessID: bf2f6095-b21e-4569-a4df-52291606bf3d
2024/10/21 12:44:01 🐳 Creating container for image testcontainers/ryuk:0.8.1
2024/10/21 12:44:01 ✅ Container created: eab8b6af62ba
2024/10/21 12:44:01 🐳 Starting container: eab8b6af62ba
2024/10/21 12:44:01 ✅ Container started: eab8b6af62ba
2024/10/21 12:44:01 ⏳ Waiting for container id eab8b6af62ba image: testcontainers/ryuk:0.8.1. Waiting for: &{Port:8080/tcp timeout:<nil> PollInterval:100ms skipInternalCheck:false}
2024/10/21 12:44:01 🔔 Container is ready: eab8b6af62ba
2024/10/21 12:44:01 🐳 Creating container for image postgres:15.3
2024/10/21 12:44:01 ✅ Container created: 7bc6dfd7af00
2024/10/21 12:44:01 🐳 Starting container: 7bc6dfd7af00
2024/10/21 12:44:01 ✅ Container started: 7bc6dfd7af00
2024/10/21 12:44:01 ⏳ Waiting for container id 7bc6dfd7af00 image: postgres:15.3. Waiting for: &{timeout:<nil> deadline:0x14000435060 Strategies:[0x14000460540]}
2024/10/21 12:44:02 🔔 Container is ready: 7bc6dfd7af00
goos: darwin
goarch: arm64
pkg: github.com/xataio/pgroll/internal/benchmarks
cpu: Apple M2 Pro
BenchmarkBackfill
BenchmarkBackfill/10000
    benchmarks_test.go:136: Seeded 10000 rows in 19.073458ms (524289 rows/s)
    benchmarks_test.go:51: Backfilled 10000 rows in 102.083958ms
BenchmarkBackfill/10000-10        	      1	102083958 ns/op	    97959 rows/s
BenchmarkBackfill/100000
    benchmarks_test.go:136: Seeded 100000 rows in 96.639042ms (1034778 rows/s)
    benchmarks_test.go:51: Backfilled 100000 rows in 2.032871959s
BenchmarkBackfill/100000-10       	      1	2032871959 ns/op	    49191 rows/s
BenchmarkBackfill/1000000
    benchmarks_test.go:136: Seeded 1000000 rows in 608.590708ms (1643140 rows/s)
    benchmarks_test.go:51: Backfilled 1000000 rows in 56.80506s
BenchmarkBackfill/1000000-10      	      1	56805060000 ns/op	    17604 rows/s
BenchmarkWriteAmplification
BenchmarkWriteAmplification/NoTrigger
BenchmarkWriteAmplification/NoTrigger/10000
    benchmarks_test.go:136: Seeded 10000 rows in 21.901875ms (456582 rows/s)
BenchmarkWriteAmplification/NoTrigger/10000-10   	      1	 15013333 ns/op	   666075 rows/s
BenchmarkWriteAmplification/NoTrigger/100000
    benchmarks_test.go:136: Seeded 100000 rows in 98.442458ms (1015822 rows/s)
BenchmarkWriteAmplification/NoTrigger/100000-10  	      1	155141667 ns/op	   644572 rows/s
BenchmarkWriteAmplification/NoTrigger/1000000
    benchmarks_test.go:136: Seeded 1000000 rows in 663.248542ms (1507730 rows/s)
BenchmarkWriteAmplification/NoTrigger/1000000-10 	      1	1704721875 ns/op	   586606 rows/s
BenchmarkWriteAmplification/WithTrigger
BenchmarkWriteAmplification/WithTrigger/10000
    benchmarks_test.go:136: Seeded 10000 rows in 26.146708ms (382457 rows/s)
BenchmarkWriteAmplification/WithTrigger/10000-10 	      1	 59703417 ns/op	   167495 rows/s
BenchmarkWriteAmplification/WithTrigger/100000
    benchmarks_test.go:136: Seeded 100000 rows in 102.552667ms (975109 rows/s)
BenchmarkWriteAmplification/WithTrigger/100000-10         	      1	630408666 ns/op	   158627 rows/s
BenchmarkWriteAmplification/WithTrigger/1000000
    benchmarks_test.go:136: Seeded 1000000 rows in 666.005167ms (1501490 rows/s)
BenchmarkWriteAmplification/WithTrigger/1000000-10        	      1	5909246000 ns/op	   169226 rows/s
PASS
2024/10/21 12:45:51 🐳 Terminating container: 7bc6dfd7af00
2024/10/21 12:45:51 🚫 Container terminated: 7bc6dfd7af00
ok  	github.com/xataio/pgroll/internal/benchmarks	110.632s
```

Part of #408
  • Loading branch information
ryanslade authored Oct 29, 2024
1 parent e2f1740 commit b6f76c7
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 3 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/benchmark.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Benchmark
on:
push:
branches:
- main
permissions:
contents: read
packages: read
jobs:
benchmark:
name: 'benchmark (pg: ${{ matrix.pgVersion }})'
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
pgVersion: ['14.8', '15.3', '16.4', '17.0' ,'latest']
steps:
- uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: 'go.mod'

- name: Run benchmarks
run: make bench
env:
POSTGRES_VERSION: ${{ matrix.pgVersion }}
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ jobs:
version: latest
args: release --clean
env:
# We use two github tokens here:
# We use two GitHub tokens here:
# * The actions-bound `GITHUB_TOKEN` with permissions to write packages.
# * The org level `GIT_TOKEN` to be able to publish the brew tap file.
# See: https://goreleaser.com/errors/resource-not-accessible-by-integration/
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ clean:
format:
# Format JSON schema
docker run --rm -v $$PWD/schema.json:/mnt/schema.json node:alpine npx prettier /mnt/schema.json --parser json --tab-width 2 --single-quote --trailing-comma all --no-semi --arrow-parens always --print-width 120 --write
# Format embedded SQL
docker run --rm -v $$PWD/pkg/state/init.sql:/mnt/init.sql node:alpine npx sql-formatter -l postgresql -o /mnt/init.sql /mnt/init.sql

generate: format
Expand All @@ -32,3 +33,6 @@ examples:

test:
go test ./...

bench:
go test ./internal/benchmarks -v -benchtime=1x -bench .
191 changes: 191 additions & 0 deletions internal/benchmarks/benchmarks_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
// SPDX-License-Identifier: Apache-2.0

package benchmarks

import (
"context"
"database/sql"
"strconv"
"testing"

"github.com/lib/pq"
"github.com/oapi-codegen/nullable"
"github.com/stretchr/testify/require"

"github.com/xataio/pgroll/internal/testutils"
"github.com/xataio/pgroll/pkg/migrations"
"github.com/xataio/pgroll/pkg/roll"
)

const unitRowsPerSecond = "rows/s"

var rowCounts = []int{10_000, 100_000, 300_000}

func TestMain(m *testing.M) {
testutils.SharedTestMain(m)
}

func BenchmarkBackfill(b *testing.B) {
ctx := context.Background()
testSchema := testutils.TestSchema()
var opts []roll.Option

for _, rowCount := range rowCounts {
b.Run(strconv.Itoa(rowCount), func(b *testing.B) {
testutils.WithMigratorInSchemaAndConnectionToContainerWithOptions(b, testSchema, opts, func(mig *roll.Roll, db *sql.DB) {
b.Cleanup(func() {
require.NoError(b, mig.Close())
})

setupInitialTable(b, ctx, testSchema, mig, db, rowCount)
b.ResetTimer()

// Backfill
b.StartTimer()
require.NoError(b, mig.Start(ctx, &migAlterColumn))
require.NoError(b, mig.Complete(ctx))
b.StopTimer()
b.Logf("Backfilled %d rows in %s", rowCount, b.Elapsed())
rowsPerSecond := float64(rowCount) / b.Elapsed().Seconds()
b.ReportMetric(rowsPerSecond, unitRowsPerSecond)
})
})
}
}

// Benchmark the difference between updating all rows with and without an update trigger in place
func BenchmarkWriteAmplification(b *testing.B) {
ctx := context.Background()
testSchema := testutils.TestSchema()
var opts []roll.Option

assertRowCount := func(tb testing.TB, db *sql.DB, rowCount int) {
tb.Helper()

var count int
err := db.QueryRowContext(ctx, "SELECT COUNT(*) FROM users WHERE name = 'person'").Scan(&count)
require.NoError(b, err)
require.Equal(b, rowCount, count)
}

b.Run("NoTrigger", func(b *testing.B) {
for _, rowCount := range rowCounts {
b.Run(strconv.Itoa(rowCount), func(b *testing.B) {
testutils.WithMigratorInSchemaAndConnectionToContainerWithOptions(b, testSchema, opts, func(mig *roll.Roll, db *sql.DB) {
setupInitialTable(b, ctx, testSchema, mig, db, rowCount)
b.Cleanup(func() {
require.NoError(b, mig.Close())
assertRowCount(b, db, rowCount)
})

b.ResetTimer()

// Update the name in all rows
b.StartTimer()
_, err := db.ExecContext(ctx, `UPDATE users SET name = 'person'`)
require.NoError(b, err)
b.StopTimer()
rowsPerSecond := float64(rowCount) / b.Elapsed().Seconds()
b.ReportMetric(rowsPerSecond, unitRowsPerSecond)
})
})
}
})

b.Run("WithTrigger", func(b *testing.B) {
for _, rowCount := range rowCounts {
b.Run(strconv.Itoa(rowCount), func(b *testing.B) {
testutils.WithMigratorInSchemaAndConnectionToContainerWithOptions(b, testSchema, opts, func(mig *roll.Roll, db *sql.DB) {
setupInitialTable(b, ctx, testSchema, mig, db, rowCount)

// Start the migration
require.NoError(b, mig.Start(ctx, &migAlterColumn))
b.Cleanup(func() {
// Finish the migration
require.NoError(b, mig.Complete(ctx))
require.NoError(b, mig.Close())
assertRowCount(b, db, rowCount)
})

b.ResetTimer()

// Update the name in all rows
b.StartTimer()
_, err := db.ExecContext(ctx, `UPDATE users SET name = 'person'`)
require.NoError(b, err)
b.StopTimer()
rowsPerSecond := float64(rowCount) / b.Elapsed().Seconds()
b.ReportMetric(rowsPerSecond, unitRowsPerSecond)
})
})
}
})
}

func setupInitialTable(tb testing.TB, ctx context.Context, testSchema string, mig *roll.Roll, db *sql.DB, rowCount int) {
tb.Helper()

seed := func(tb testing.TB, rowCount int, db *sql.DB) {
tx, err := db.Begin()
require.NoError(tb, err)
defer tx.Rollback()

stmt, err := tx.PrepareContext(ctx, pq.CopyInSchema(testSchema, "users", "name"))
require.NoError(tb, err)

for i := 0; i < rowCount; i++ {
_, err = stmt.ExecContext(ctx, nil)
require.NoError(tb, err)
}

_, err = stmt.ExecContext(ctx)
require.NoError(tb, err)
require.NoError(tb, tx.Commit())
}

// Setup
require.NoError(tb, mig.Start(ctx, &migCreateTable))
require.NoError(tb, mig.Complete(ctx))
seed(tb, rowCount, db)
}

// Simple table with a nullable `name` field.
var migCreateTable = migrations.Migration{
Name: "01_create_table",
Operations: migrations.Operations{
&migrations.OpCreateTable{
Name: "users",
Columns: []migrations.Column{
{
Name: "id",
Type: "serial",
Pk: ptr(true),
},
{
Name: "name",
Type: "varchar(255)",
Nullable: ptr(true),
Unique: ptr(false),
},
},
},
},
}

// Alter the table to make the name field not null and backfill the old name fields with
// `placeholder`.
var migAlterColumn = migrations.Migration{
Name: "02_alter_column",
Operations: migrations.Operations{
&migrations.OpAlterColumn{
Table: "users",
Column: "name",
Up: "(SELECT CASE WHEN name IS NULL THEN 'placeholder' ELSE name END)",
Down: "user_name",
Comment: nullable.NewNullableWithValue("the name of the user"),
Nullable: ptr(false),
},
},
}

func ptr[T any](x T) *T { return &x }
4 changes: 2 additions & 2 deletions internal/testutils/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ func WithUninitializedState(t *testing.T, fn func(*state.State)) {
fn(st)
}

func WithMigratorInSchemaAndConnectionToContainerWithOptions(t *testing.T, schema string, opts []roll.Option, fn func(mig *roll.Roll, db *sql.DB)) {
func WithMigratorInSchemaAndConnectionToContainerWithOptions(t testing.TB, schema string, opts []roll.Option, fn func(mig *roll.Roll, db *sql.DB)) {
t.Helper()
ctx := context.Background()

Expand Down Expand Up @@ -236,7 +236,7 @@ func WithMigratorAndConnectionToContainerWithOptions(t *testing.T, opts []roll.O
// - a connection to the new database
// - the connection string to the new database
// - the name of the new database
func setupTestDatabase(t *testing.T) (*sql.DB, string, string) {
func setupTestDatabase(t testing.TB) (*sql.DB, string, string) {
t.Helper()
ctx := context.Background()

Expand Down

0 comments on commit b6f76c7

Please sign in to comment.