Skip to content

Commit

Permalink
Add dbutil for building mass insert queries (#10)
Browse files Browse the repository at this point in the history
  • Loading branch information
tulir authored Mar 17, 2024
1 parent b7b9dd6 commit e3ab497
Show file tree
Hide file tree
Showing 3 changed files with 322 additions and 0 deletions.
162 changes: 162 additions & 0 deletions dbutil/massinsert.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
// Copyright (c) 2024 Tulir Asokan
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package dbutil

import (
"fmt"
"regexp"
"strings"
)

// Array is an interface for small fixed-size arrays.
// It exists because generics can't specify array sizes: https://github.com/golang/go/issues/44253
type Array interface {
[1]any | [2]any | [3]any | [4]any | [5]any | [6]any | [7]any | [8]any | [9]any | [10]any | [11]any | [12]any | [13]any | [14]any | [15]any | [16]any | [17]any | [18]any | [19]any | [20]any
}

// MassInsertable represents a struct that contains dynamic values for a mass insert query.
type MassInsertable[T Array] interface {
GetMassInsertValues() T
}

// MassInsertBuilder contains pre-validated templates for building mass insert SQL queries.
type MassInsertBuilder[Item MassInsertable[DynamicParams], StaticParams Array, DynamicParams Array] struct {
queryTemplate string
placeholderTemplate string
}

// NewMassInsertBuilder creates a new MassInsertBuilder that can build mass insert database queries.
//
// Parameters in mass insert queries are split into two types: static parameters
// and dynamic parameters. Static parameters are the same for all items being
// inserted, while dynamic parameters are different for each item.
//
// The given query should be a normal INSERT query for a single row. It can also
// have ON CONFLICT clauses, as long as the clause uses `excluded` instead of
// positional parameters.
//
// The placeholder template is used to replace the `VALUES` part of the given
// query. It should contain a positional placeholder ($1, $2, ...) for each
// static placeholder, and a fmt directive (`$%d`) for each dynamic placeholder.
//
// The given query and placeholder template are validated here and the function
// will panic if they're invalid (e.g. if the `VALUES` part of the insert query
// can't be found, or if the placeholder template doesn't have the right things).
// The idea is to use this function to populate a global variable with the mass
// insert builder, so the panic will happen at startup if the query or
// placeholder template are invalid (instead of returning an error when trying
// to use the query later).
//
// Example:
//
// type Message struct {
// ChatID int
// RemoteID string
// MXID id.EventID
// Timestamp time.Time
// }
//
// func (msg *Message) GetMassInsertValues() [3]any {
// return [3]any{msg.RemoteID, msg.MXID, msg.Timestamp.UnixMilli()}
// }
//
// const insertMessageQuery = `INSERT INTO message (chat_id, remote_id, mxid, timestamp) VALUES ($1, $2, $3, $4)`
// var massInsertMessageBuilder = dbutil.NewMassInsertBuilder[Message, [2]any](insertMessageQuery, "($1, $%d, $%d, $%d, $%d)")
//
// func DoMassInsert(ctx context.Context, messages []*Message) error {
// query, params := massInsertMessageBuilder.Build([1]any{messages[0].ChatID}, messages)
// return db.Exec(ctx, query, params...)
// }
func NewMassInsertBuilder[Item MassInsertable[DynamicParams], StaticParams Array, DynamicParams Array](
singleInsertQuery, placeholderTemplate string,
) *MassInsertBuilder[Item, StaticParams, DynamicParams] {
var dyn DynamicParams
var stat StaticParams
totalParams := len(dyn) + len(stat)
mainQueryVariablePlaceholderParts := make([]string, totalParams)
for i := 0; i < totalParams; i++ {
mainQueryVariablePlaceholderParts[i] = fmt.Sprintf(`\$%d`, i+1)
}
mainQueryVariablePlaceholderRegex := regexp.MustCompile(fmt.Sprintf(`\(\s*%s\s*\)`, strings.Join(mainQueryVariablePlaceholderParts, `\s*,\s*`)))
queryPlaceholders := mainQueryVariablePlaceholderRegex.FindAllString(singleInsertQuery, -1)
if len(queryPlaceholders) == 0 {
panic(fmt.Errorf("invalid insert query: placeholders not found"))
} else if len(queryPlaceholders) > 1 {
panic(fmt.Errorf("invalid insert query: multiple placeholders found"))
}
for i := 0; i < len(stat); i++ {
if !strings.Contains(placeholderTemplate, fmt.Sprintf("$%d", i+1)) {
panic(fmt.Errorf("invalid placeholder template: static placeholder $%d not found", i+1))
}
}
if strings.Contains(placeholderTemplate, fmt.Sprintf("$%d", len(stat)+1)) {
panic(fmt.Errorf("invalid placeholder template: non-static placeholder $%d found", len(stat)+1))
}
fmtParams := make([]any, len(dyn))
for i := 0; i < len(dyn); i++ {
fmtParams[i] = fmt.Sprintf("$%d", len(stat)+i+1)
}
formattedPlaceholder := fmt.Sprintf(placeholderTemplate, fmtParams...)
if strings.Contains(formattedPlaceholder, "!(EXTRA string=") {
panic(fmt.Errorf("invalid placeholder template: extra string found"))
}
for i := 0; i < len(dyn); i++ {
if !strings.Contains(formattedPlaceholder, fmt.Sprintf("$%d", len(stat)+i+1)) {
panic(fmt.Errorf("invalid placeholder template: dynamic placeholder $%d not found", len(stat)+i+1))
}
}
return &MassInsertBuilder[Item, StaticParams, DynamicParams]{
queryTemplate: strings.Replace(singleInsertQuery, queryPlaceholders[0], "%s", 1),
placeholderTemplate: placeholderTemplate,
}
}

// Build constructs a ready-to-use mass insert SQL query using the prepared templates in this builder.
//
// This method always only produces one query. If there are lots of items,
// chunking them beforehand may be required to avoid query parameter limits.
// For example, SQLite (3.32+) has a limit of 32766 parameters by default,
// while Postgres allows up to 65535. To find out if there are too many items,
// divide the maximum number of parameters by the number of dynamic columns in
// your data and subtract the number of static columns.
//
// Example of chunking input data:
//
// var mib dbutil.MassInsertBuilder
// var db *dbutil.Database
// func MassInsert(ctx context.Context, ..., data []T) error {
// return db.DoTxn(ctx, nil, func(ctx context.Context) error {
// for _, chunk := range exslices.Chunk(data, 100) {
// query, params := mib.Build(staticParams)
// _, err := db.Exec(ctx, query, params...)
// if err != nil {
// return err
// }
// }
// return nil
// }
// }
func (mib *MassInsertBuilder[Item, StaticParams, DynamicParams]) Build(static StaticParams, data []Item) (query string, params []any) {
var itemValues DynamicParams
params = make([]any, len(static)+len(itemValues)*len(data))
placeholders := make([]string, len(data))
for i := 0; i < len(static); i++ {
params[i] = static[i]
}
fmtParams := make([]any, len(itemValues))
for i, item := range data {
baseIndex := len(static) + len(itemValues)*i
itemValues = item.GetMassInsertValues()
for j := 0; j < len(itemValues); j++ {
params[baseIndex+j] = itemValues[j]
fmtParams[j] = baseIndex + j + 1
}
placeholders[i] = fmt.Sprintf(mib.placeholderTemplate, fmtParams...)
}
query = fmt.Sprintf(mib.queryTemplate, strings.Join(placeholders, ", "))
return
}
132 changes: 132 additions & 0 deletions dbutil/massinsert_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Copyright (c) 2024 Tulir Asokan
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package dbutil_test

import (
"fmt"
"math/rand"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"

"go.mau.fi/util/dbutil"
"go.mau.fi/util/random"
)

type AbstractMassInsertable[T dbutil.Array] struct {
Data T
}

func (a AbstractMassInsertable[T]) GetMassInsertValues() T {
return a.Data
}

type OneParamMassInsertable = AbstractMassInsertable[[1]any]

func TestNewMassInsertBuilder_InvalidParams(t *testing.T) {
assert.PanicsWithError(t, "invalid insert query: placeholders not found", func() {
dbutil.NewMassInsertBuilder[OneParamMassInsertable, [1]any]("", "")
})
assert.PanicsWithError(t, "invalid placeholder template: static placeholder $1 not found", func() {
dbutil.NewMassInsertBuilder[OneParamMassInsertable, [1]any]("INSERT INTO foo VALUES ($1, $2)", "")
})
assert.PanicsWithError(t, "invalid placeholder template: non-static placeholder $2 found", func() {
dbutil.NewMassInsertBuilder[OneParamMassInsertable, [1]any]("INSERT INTO foo VALUES ($1, $2)", "($1, $2)")
})
assert.PanicsWithError(t, "invalid placeholder template: extra string found", func() {
dbutil.NewMassInsertBuilder[OneParamMassInsertable, [1]any]("INSERT INTO foo VALUES ($1, $2)", "($1)")
})
}

func TestMassInsertBuilder_Build(t *testing.T) {
builder := dbutil.NewMassInsertBuilder[OneParamMassInsertable, [1]any]("INSERT INTO foo VALUES ($1, $2)", "($1, $%d)")
query, values := builder.Build([1]any{"hi"}, []OneParamMassInsertable{{[1]any{"hmm"}}, {[1]any{"meow"}}, {[1]any{"third"}}})
assert.Equal(t, "INSERT INTO foo VALUES ($1, $2), ($1, $3), ($1, $4)", query)
assert.Equal(t, []any{"hi", "hmm", "meow", "third"}, values)
}

func TestMassInsertBuilder_Build_MultiValue(t *testing.T) {
ts := time.Now().UnixMilli()
builder := dbutil.NewMassInsertBuilder[AbstractMassInsertable[[5]any], [3]any]("INSERT INTO foo VALUES ($1, $2, $3, $4, $5, $6, $7, $8)", "($1, $2, $%d, $%d, $3, $%d, $%d, $%d)")
query, values := builder.Build([3]any{"first", "second", 3}, []AbstractMassInsertable[[5]any]{
{[5]any{"foo1", 123, true, "meow", ts}},
{[5]any{"foo2", 666, false, "meow", ts + 1}},
{[5]any{"foo3", 999, true, "no meow", ts + 2}},
{[5]any{"foo4", 0, true, "meow!", 0}},
})
assert.Equal(t, "INSERT INTO foo VALUES ($1, $2, $4, $5, $3, $6, $7, $8), ($1, $2, $9, $10, $3, $11, $12, $13), ($1, $2, $14, $15, $3, $16, $17, $18), ($1, $2, $19, $20, $3, $21, $22, $23)", query)
assert.Equal(t, []any{"first", "second", 3, "foo1", 123, true, "meow", ts, "foo2", 666, false, "meow", ts + 1, "foo3", 999, true, "no meow", ts + 2, "foo4", 0, true, "meow!", 0}, values)
}

func TestMassInsertBuilder_Build_CompareWithManual(t *testing.T) {
builder := dbutil.NewMassInsertBuilder[AbstractMassInsertable[[5]any], [3]any]("INSERT INTO foo VALUES ($1, $2, $3, $4, $5, $6, $7, $8)", "($1, $2, $%d, $%d, $3, $%d, $%d, $%d)")
data := makeBenchmarkData[[5]any](100)
manualQuery, manualParams := buildMassInsertManual(data)
query, params := builder.Build([3]any{"first", "second", 3}, data)
assert.Equal(t, manualQuery, query)
assert.Equal(t, manualParams, params)
}

func makeBenchmarkData[T dbutil.Array](n int) []AbstractMassInsertable[T] {
outArr := make([]AbstractMassInsertable[T], n)
dataLen := len(outArr[0].Data)
for i := 0; i < dataLen; i++ {
var val any
switch rand.Intn(4) {
case 0:
val = rand.Intn(1000)
case 1:
val = rand.Intn(1) == 0
case 2:
val = time.Now().UnixMilli()
case 3:
val = random.String(16)
}
for j := 0; j < len(outArr); j++ {
outArr[j].Data[i] = val
}
}
return outArr
}

func BenchmarkMassInsertBuilder_Build5x100(b *testing.B) {
builder := dbutil.NewMassInsertBuilder[AbstractMassInsertable[[5]any], [3]any]("INSERT INTO foo VALUES ($1, $2, $3, $4, $5, $6, $7, $8)", "($1, $2, $%d, $%d, $3, $%d, $%d, $%d)")
data := makeBenchmarkData[[5]any](100)
for i := 0; i < b.N; i++ {
builder.Build([3]any{"first", "second", 3}, data)
}
}

func buildMassInsertManual(data []AbstractMassInsertable[[5]any]) (string, []any) {
const queryTemplate = `INSERT INTO foo VALUES %s`
const placeholderTemplate = "($1, $2, $%d, $%d, $3, $%d, $%d, $%d)"
placeholders := make([]string, len(data))
params := make([]any, 3+len(data)*5)
params[0] = "first"
params[1] = "second"
params[2] = 3
for j, item := range data {
baseIndex := j*5 + 3
params[baseIndex] = item.Data[0]
params[baseIndex+1] = item.Data[1]
params[baseIndex+2] = item.Data[2]
params[baseIndex+3] = item.Data[3]
params[baseIndex+4] = item.Data[4]
placeholders[j] = fmt.Sprintf(placeholderTemplate, baseIndex+1, baseIndex+2, baseIndex+3, baseIndex+4, baseIndex+5)
}
query := fmt.Sprintf(queryTemplate, strings.Join(placeholders, ", "))
return query, params
}

func BenchmarkMassInsertBuilder_Build5x100_Manual(b *testing.B) {
data := makeBenchmarkData[[5]any](100)
for i := 0; i < b.N; i++ {
buildMassInsertManual(data)
}
}
28 changes: 28 additions & 0 deletions exslices/chunk.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright (c) 2024 Tulir Asokan
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package exslices

// Chunk splits a slice into chunks of the given size.
//
// From https://github.com/golang/go/issues/53987#issuecomment-1224367139
//
// TODO remove this after slices.Chunk can be used (it'll probably be added in Go 1.23, so it can be used after 1.22 is EOL)
func Chunk[T any](slice []T, size int) (chunks [][]T) {
if size < 1 {
panic("chunk size cannot be less than 1")
}
for i := 0; ; i++ {
next := i * size
if len(slice[next:]) > size {
end := next + size
chunks = append(chunks, slice[next:end:end])
} else {
chunks = append(chunks, slice[i*size:])
return
}
}
}

0 comments on commit e3ab497

Please sign in to comment.