Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

schemadiff: improved heuristic for dependent migration permutation evaluation time #14249

9 changes: 6 additions & 3 deletions go/vt/schemadiff/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package schemadiff

import (
"context"
"testing"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -403,6 +404,7 @@ func TestDiffViews(t *testing.T) {
}

func TestDiffSchemas(t *testing.T) {
ctx := context.Background()
tt := []struct {
name string
from string
Expand Down Expand Up @@ -806,7 +808,7 @@ func TestDiffSchemas(t *testing.T) {
} else {
assert.NoError(t, err)

diffs, err := diff.OrderedDiffs()
diffs, err := diff.OrderedDiffs(ctx)
assert.NoError(t, err)
statements := []string{}
cstatements := []string{}
Expand Down Expand Up @@ -858,6 +860,7 @@ func TestDiffSchemas(t *testing.T) {
}

func TestSchemaApplyError(t *testing.T) {
ctx := context.Background()
tt := []struct {
name string
from string
Expand Down Expand Up @@ -900,7 +903,7 @@ func TestSchemaApplyError(t *testing.T) {
{
diff, err := schema1.SchemaDiff(schema2, hints)
require.NoError(t, err)
diffs, err := diff.OrderedDiffs()
diffs, err := diff.OrderedDiffs(ctx)
assert.NoError(t, err)
assert.NotEmpty(t, diffs)
_, err = schema1.Apply(diffs)
Expand All @@ -911,7 +914,7 @@ func TestSchemaApplyError(t *testing.T) {
{
diff, err := schema2.SchemaDiff(schema1, hints)
require.NoError(t, err)
diffs, err := diff.OrderedDiffs()
diffs, err := diff.OrderedDiffs(ctx)
assert.NoError(t, err)
assert.NotEmpty(t, diffs, "schema1: %v, schema2: %v", schema1.ToSQL(), schema2.ToSQL())
_, err = schema2.Apply(diffs)
Expand Down
5 changes: 3 additions & 2 deletions go/vt/schemadiff/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ type ImpossibleApplyDiffOrderError struct {

func (e *ImpossibleApplyDiffOrderError) Error() string {
var b strings.Builder
b.WriteString("no valid applicable order for diffs. Diffs found conflicting:")
for _, s := range e.ConflictingStatements() {
conflictingStatements := e.ConflictingStatements()
b.WriteString(fmt.Sprintf("no valid applicable order for diffs. %d diffs found conflicting:", len(conflictingStatements)))
for _, s := range conflictingStatements {
b.WriteString("\n")
b.WriteString(s)
}
Expand Down
103 changes: 85 additions & 18 deletions go/vt/schemadiff/schema_diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ limitations under the License.
package schemadiff

import (
"context"
"fmt"
"sort"

"vitess.io/vitess/go/mathutil"
)
Expand Down Expand Up @@ -68,6 +70,16 @@ func (d *DiffDependency) Type() DiffDependencyType {
return d.typ
}

// IsInOrder returns true if this dependency indicates a known order
func (d *DiffDependency) IsInOrder() bool {
return d.typ >= DiffDependencyInOrderCompletion
}

// IsSequential returns true if this is a sequential dependency
func (d *DiffDependency) IsSequential() bool {
return d.typ >= DiffDependencySequentialExecution
}

/*
The below is adapted from https://yourbasic.org/golang/generate-permutation-slice-string/
Licensed under https://creativecommons.org/licenses/by/3.0/
Expand All @@ -76,31 +88,74 @@ Modified to have an early break

// permutateDiffs calls `callback` with each permutation of a. If the function returns `true`, that means
// the callback has returned `true` for an early break, thus possibly not all permutations have been evaluated.
func permutateDiffs(a []EntityDiff, callback func([]EntityDiff) (earlyBreak bool)) (earlyBreak bool) {
if len(a) == 0 {
return false
func permutateDiffs(ctx context.Context, diffs []EntityDiff, callback func([]EntityDiff) (earlyBreak bool)) (earlyBreak bool, err error) {
if len(diffs) == 0 {
return false, nil
}
return permDiff(a, callback, 0)
// Sort by a heristic (DROPs first, ALTERs next, CREATEs last). This ordering is then used first in the permutation
// search and serves as seed for the rest of permutations.

return permDiff(ctx, diffs, callback, 0)
}

// permDiff is a recursive function to permutate given `a` and call `callback` for each permutation.
// If `callback` returns `true`, then so does this function, and this indicates a request for an early
// break, in which case this function will not be called again.
func permDiff(a []EntityDiff, callback func([]EntityDiff) (earlyBreak bool), i int) (earlyBreak bool) {
func permDiff(ctx context.Context, a []EntityDiff, callback func([]EntityDiff) (earlyBreak bool), i int) (earlyBreak bool, err error) {
if err := ctx.Err(); err != nil {
return true, err // early break
}
if i > len(a) {
return callback(a)
return callback(a), nil
}
if permDiff(a, callback, i+1) {
return true
if brk, err := permDiff(ctx, a, callback, i+1); brk {
return true, err
}
for j := i + 1; j < len(a); j++ {
// An optimization: we don't really need all possible permutations. We can skip some of the recursive search.
// We know we begin with a heuristic order where DROP VIEW comes first, then DROP TABLE, then ALTER TABLE & VIEW,
// then CREATE TABLE, then CREATE VIEW. And the entities in that initial order are sorted by dependency. That's
// thank's to Schema's UnorderedDiffs() existing heuristic.
// Now, some pairs of statements should be permutated, but some others will have absolutely no advantage to permutate.
// For example, a DROP VIEW and CREATE VIEW: there's no advantage to permutate the two. If the initial order is
// inapplicable, then so will be the permutated order.
// The next section identifies some no-brainers conditions for skipping swapping of elements.
// There could be even more fine grained scenarios, which we can deal with in the future.
iIsCreateDropView := false
iIsTable := false
switch a[i].(type) {
case *DropViewEntityDiff, *CreateViewEntityDiff:
iIsCreateDropView = true
case *DropTableEntityDiff, *AlterTableEntityDiff, *CreateTableEntityDiff:
iIsTable = true
}

jIsCreateDropView := false
jIsTable := false
switch a[j].(type) {
case *DropViewEntityDiff, *CreateViewEntityDiff:
jIsCreateDropView = true
case *DropTableEntityDiff, *AlterTableEntityDiff, *CreateTableEntityDiff:
jIsTable = true
}

if iIsCreateDropView && jIsCreateDropView {
continue
}
if iIsCreateDropView && jIsTable {
continue
}
if iIsTable && jIsCreateDropView {
continue
}
// End of optimization
a[i], a[j] = a[j], a[i]
if permDiff(a, callback, i+1) {
return true
if brk, err := permDiff(ctx, a, callback, i+1); brk {
return true, err
}
a[i], a[j] = a[j], a[i]
}
return false
return false, nil
}

// SchemaDiff is a rich diff between two schemas. It includes the following:
Expand Down Expand Up @@ -232,10 +287,15 @@ func (d *SchemaDiff) HasSequentialExecutionDependencies() bool {

// OrderedDiffs returns the list of diff in applicable order, if possible. This is a linearized representation
// where diffs may be applied in-order one after another, keeping the schema in valid state at all times.
func (d *SchemaDiff) OrderedDiffs() ([]EntityDiff, error) {
lastGoodSchema := d.schema
func (d *SchemaDiff) OrderedDiffs(ctx context.Context) ([]EntityDiff, error) {
lastGoodSchema := d.schema.copy()
var orderedDiffs []EntityDiff
m := d.r.Map()

unorderedDiffsMap := map[string]int{}
for i, diff := range d.UnorderedDiffs() {
unorderedDiffsMap[diff.CanonicalStatementString()] = i
}
// The order of classes in the quivalence relation is, generally speaking, loyal to the order of original diffs.
for _, class := range d.r.OrderedClasses() {
classDiffs := []EntityDiff{}
Expand All @@ -247,15 +307,18 @@ func (d *SchemaDiff) OrderedDiffs() ([]EntityDiff, error) {
}
classDiffs = append(classDiffs, diff)
}
sort.SliceStable(classDiffs, func(i, j int) bool {
return unorderedDiffsMap[classDiffs[i].CanonicalStatementString()] < unorderedDiffsMap[classDiffs[j].CanonicalStatementString()]
})

// We will now permutate the diffs in this equivalence class, and hopefully find
// a valid permutation (one where if we apply the diffs in-order, the schema remains valid throughout the process)
foundValidPathForClass := permutateDiffs(classDiffs, func(permutatedDiffs []EntityDiff) bool {
permutationSchema := lastGoodSchema
foundValidPathForClass, err := permutateDiffs(ctx, classDiffs, func(permutatedDiffs []EntityDiff) bool {
permutationSchema := lastGoodSchema.copy()
// We want to apply the changes one by one, and validate the schema after each change
var err error
for i := range permutatedDiffs {
permutationSchema, err = permutationSchema.Apply(permutatedDiffs[i : i+1])
if err != nil {
// apply inline
if err := permutationSchema.apply(permutatedDiffs[i : i+1]); err != nil {
// permutation is invalid
return false // continue searching
}
Expand All @@ -265,13 +328,17 @@ func (d *SchemaDiff) OrderedDiffs() ([]EntityDiff, error) {
lastGoodSchema = permutationSchema
return true // early break! No need to keep searching
})
if err != nil {
return nil, err
}
if !foundValidPathForClass {
// In this equivalence class, there is no valid permutation. We cannot linearize the diffs.
return nil, &ImpossibleApplyDiffOrderError{
UnorderedDiffs: d.UnorderedDiffs(),
ConflictingDiffs: classDiffs,
}
}

// Done taking care of this equivalence class.
}
return orderedDiffs, nil
Expand Down
Loading
Loading