Skip to content

Commit

Permalink
schemadiff: fix diffing of textual columns with implicit charsets (#…
Browse files Browse the repository at this point in the history
…14930)

Signed-off-by: Shlomi Noach <[email protected]>
Signed-off-by: Dirkjan Bussink <[email protected]>
Co-authored-by: Dirkjan Bussink <[email protected]>
  • Loading branch information
shlomi-noach and dbussink authored Jan 12, 2024
1 parent 961a978 commit eddb39e
Show file tree
Hide file tree
Showing 48 changed files with 707 additions and 382 deletions.
2 changes: 1 addition & 1 deletion go/cmd/vtcombo/cli/plugin_grpcvtctldserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
func init() {
servenv.OnRun(func() {
if servenv.GRPCCheckServiceMap("vtctld") {
grpcvtctldserver.StartServer(servenv.GRPCServer, ts, parser)
grpcvtctldserver.StartServer(servenv.GRPCServer, ts, collationEnv, parser)
}
})
}
5 changes: 3 additions & 2 deletions go/cmd/vtctl/vtctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ func main() {
ctx, cancel := context.WithTimeout(context.Background(), waitTime)
installSignalHandlers(cancel)

collationEnv := collations.NewEnvironment(servenv.MySQLServerVersion())

parser, err := sqlparser.New(sqlparser.Options{
MySQLServerVersion: servenv.MySQLServerVersion(),
TruncateUILen: servenv.TruncateUILen,
Expand Down Expand Up @@ -164,7 +166,7 @@ func main() {
// New behavior. Strip off the prefix, and set things up to run through
// the vtctldclient command tree, using the localvtctldclient (in-process)
// client.
vtctld := grpcvtctldserver.NewVtctldServer(ts, parser)
vtctld := grpcvtctldserver.NewVtctldServer(ts, collationEnv, parser)
localvtctldclient.SetServer(vtctld)
command.VtctldClientProtocol = "local"

Expand All @@ -180,7 +182,6 @@ func main() {
fallthrough
default:
log.Warningf("WARNING: vtctl should only be used for VDiff v1 workflows. Please use VDiff v2 and consider using vtctldclient for all other commands.")
collationEnv := collations.NewEnvironment(servenv.MySQLServerVersion())
wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient(), collationEnv, parser)

if args[0] == "--" {
Expand Down
2 changes: 1 addition & 1 deletion go/cmd/vtctld/cli/plugin_grpcvtctldserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
func init() {
servenv.OnRun(func() {
if servenv.GRPCCheckServiceMap("vtctld") {
grpcvtctldserver.StartServer(servenv.GRPCServer, ts, parser)
grpcvtctldserver.StartServer(servenv.GRPCServer, ts, collationEnv, parser)
}
})
}
7 changes: 5 additions & 2 deletions go/cmd/vtctldclient/command/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (

"github.com/spf13/cobra"

"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/trace"
"vitess.io/vitess/go/vt/log"
"vitess.io/vitess/go/vt/logutil"
Expand Down Expand Up @@ -82,7 +83,8 @@ var (
actionTimeout time.Duration
compactOutput bool

parser *sqlparser.Parser
collationEnv *collations.Environment
parser *sqlparser.Parser

topoOptions = struct {
implementation string
Expand Down Expand Up @@ -212,7 +214,7 @@ func getClientForCommand(cmd *cobra.Command) (vtctldclient.VtctldClient, error)
return nil
})
})
vtctld := grpcvtctldserver.NewVtctldServer(ts, parser)
vtctld := grpcvtctldserver.NewVtctldServer(ts, collationEnv, parser)
localvtctldclient.SetServer(vtctld)
VtctldClientProtocol = "local"
server = ""
Expand All @@ -230,6 +232,7 @@ func init() {
Root.PersistentFlags().StringVar(&topoOptions.globalRoot, "topo-global-root", topoOptions.globalRoot, "the path of the global topology data in the global topology server")
vreplcommon.RegisterCommands(Root)

collationEnv = collations.NewEnvironment(servenv.MySQLServerVersion())
var err error
parser, err = sqlparser.New(sqlparser.Options{
MySQLServerVersion: servenv.MySQLServerVersion(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

"vitess.io/vitess/go/cmd/vtctldclient/command"
"vitess.io/vitess/go/cmd/vtctldclient/command/vreplication/common"
"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/vt/sqlparser"
"vitess.io/vitess/go/vt/topo"
"vitess.io/vitess/go/vt/topo/memorytopo"
Expand Down Expand Up @@ -145,7 +146,7 @@ func SetupLocalVtctldClient(t *testing.T, ctx context.Context, cells ...string)
tmclient.RegisterTabletManagerClientFactory("grpc", func() tmclient.TabletManagerClient {
return nil
})
vtctld := grpcvtctldserver.NewVtctldServer(ts, sqlparser.NewTestParser())
vtctld := grpcvtctldserver.NewVtctldServer(ts, collations.MySQL8(), sqlparser.NewTestParser())
localvtctldclient.SetServer(vtctld)
command.VtctldClientProtocol = "local"
client, err := vtctldclient.New(command.VtctldClientProtocol, "")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"sync"
"testing"

"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/grpcclient"
"vitess.io/vitess/go/vt/sqlparser"
Expand Down Expand Up @@ -84,7 +85,7 @@ func newTestVDiffEnv(t testing.TB, ctx context.Context, sourceShards, targetShar
tabletType: topodatapb.TabletType_REPLICA,
tmc: newTestVDiffTMClient(),
}
env.ws = workflow.NewServer(env.topoServ, env.tmc, sqlparser.NewTestParser())
env.ws = workflow.NewServer(env.topoServ, env.tmc, collations.MySQL8(), sqlparser.NewTestParser())
env.tmc.testEnv = env

// Generate a unique dialer name.
Expand Down
20 changes: 10 additions & 10 deletions go/test/endtoend/schemadiff/vrepl/schemadiff_vrepl_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,18 +345,19 @@ func ignoreAutoIncrement(t *testing.T, createTable string) string {

func validateDiff(t *testing.T, fromCreateTable string, toCreateTable string, allowSchemadiffNormalization bool, hints *schemadiff.DiffHints) {
// turn the "from" and "to" create statement strings (which we just read via SHOW CREATE TABLE into sqlparser.CreateTable statement)
fromStmt, err := sqlparser.NewTestParser().ParseStrictDDL(fromCreateTable)
env := schemadiff.NewTestEnv()
fromStmt, err := env.Parser.ParseStrictDDL(fromCreateTable)
require.NoError(t, err)
fromCreateTableStatement, ok := fromStmt.(*sqlparser.CreateTable)
require.True(t, ok)

toStmt, err := sqlparser.NewTestParser().ParseStrictDDL(toCreateTable)
toStmt, err := env.Parser.ParseStrictDDL(toCreateTable)
require.NoError(t, err)
toCreateTableStatement, ok := toStmt.(*sqlparser.CreateTable)
require.True(t, ok)

// The actual diff logic here!
diff, err := schemadiff.DiffTables(fromCreateTableStatement, toCreateTableStatement, hints)
diff, err := schemadiff.DiffTables(env, fromCreateTableStatement, toCreateTableStatement, hints)
assert.NoError(t, err)

// The diff can be empty or there can be an actual ALTER TABLE statement
Expand Down Expand Up @@ -385,7 +386,6 @@ func validateDiff(t *testing.T, fromCreateTable string, toCreateTable string, al
// the table generated by the test's own ALTER statement?

// But wait, there's caveats.

if toCreateTable != resultCreateTable {
// schemadiff's ALTER statement can normalize away CHARACTER SET and COLLATION definitions:
// when altering a column's CHARTSET&COLLATION into the table's values, schemadiff just strips the
Expand All @@ -394,20 +394,20 @@ func validateDiff(t *testing.T, fromCreateTable string, toCreateTable string, al
// structure is identical. And so we accept that there can be a normalization issue.
if allowSchemadiffNormalization {
{
stmt, err := sqlparser.NewTestParser().ParseStrictDDL(toCreateTable)
stmt, err := env.Parser.ParseStrictDDL(toCreateTable)
require.NoError(t, err)
createTableStatement, ok := stmt.(*sqlparser.CreateTable)
require.True(t, ok)
c, err := schemadiff.NewCreateTableEntity(createTableStatement)
c, err := schemadiff.NewCreateTableEntity(env, createTableStatement)
require.NoError(t, err)
toCreateTable = c.Create().CanonicalStatementString()
}
{
stmt, err := sqlparser.NewTestParser().ParseStrictDDL(resultCreateTable)
stmt, err := env.Parser.ParseStrictDDL(resultCreateTable)
require.NoError(t, err)
createTableStatement, ok := stmt.(*sqlparser.CreateTable)
require.True(t, ok)
c, err := schemadiff.NewCreateTableEntity(createTableStatement)
c, err := schemadiff.NewCreateTableEntity(env, createTableStatement)
require.NoError(t, err)
resultCreateTable = c.Create().CanonicalStatementString()
}
Expand All @@ -418,12 +418,12 @@ func validateDiff(t *testing.T, fromCreateTable string, toCreateTable string, al
assert.Equal(t, toCreateTable, resultCreateTable, "mismatched table structure. ALTER query was: %s", diffedAlterQuery)

// Also, let's see that our diff agrees there's no change:
resultStmt, err := sqlparser.NewTestParser().ParseStrictDDL(resultCreateTable)
resultStmt, err := env.Parser.ParseStrictDDL(resultCreateTable)
require.NoError(t, err)
resultCreateTableStatement, ok := resultStmt.(*sqlparser.CreateTable)
require.True(t, ok)

resultDiff, err := schemadiff.DiffTables(toCreateTableStatement, resultCreateTableStatement, hints)
resultDiff, err := schemadiff.DiffTables(env, toCreateTableStatement, resultCreateTableStatement, hints)
assert.NoError(t, err)
assert.Nil(t, resultDiff)
}
Expand Down
95 changes: 90 additions & 5 deletions go/vt/schemadiff/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ package schemadiff
import (
"strings"

"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/vt/sqlparser"
"vitess.io/vitess/go/vt/vterrors"

vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
)

// columnDetails decorates a column with more details, used by diffing logic
Expand Down Expand Up @@ -79,14 +83,95 @@ func NewColumnDefinitionEntity(c *sqlparser.ColumnDefinition) *ColumnDefinitionE

// ColumnDiff compares this table statement with another table statement, and sees what it takes to
// change this table to look like the other table.
// It returns an AlterTable statement if changes are found, or nil if not.
// the other table may be of different name; its name is ignored.
func (c *ColumnDefinitionEntity) ColumnDiff(other *ColumnDefinitionEntity, _ *DiffHints) *ModifyColumnDiff {
// It returns an ModifyColumnDiff statement if changes are found, or nil if not.
// The function also requires the charset/collate on the source & target tables. This is because the column's
// charset & collation, if undefined, are really defined by the table's charset & collation.
//
// Anecdotally, in CreateTableEntity.normalize() we actually actively strip away the charset/collate properties
// from the column definition, to get a cleaner table definition.
//
// Things get complicated when we consider hints.TableCharsetCollateStrategy. Consider this test case:
//
// from: "create table t (a varchar(64)) default charset=latin1",
// to: "create table t (a varchar(64) CHARACTER SET latin1 COLLATE latin1_bin)",
//
// In both cases, the column is really a latin1. But the tables themselves have different collations.
// We need to denormalize the column's charset/collate properties, so that the comparison can be done.
func (c *ColumnDefinitionEntity) ColumnDiff(
env *Environment,
other *ColumnDefinitionEntity,
t1cc *charsetCollate,
t2cc *charsetCollate,
) (*ModifyColumnDiff, error) {
if c.IsTextual() || other.IsTextual() {
// We will now denormalize the columns charset & collate as needed (if empty, populate from table.)

if c.columnDefinition.Type.Charset.Name == "" && c.columnDefinition.Type.Options.Collate != "" {
// Column has explicit collation but no charset. We can infer the charset from the collation.
collationID := env.CollationEnv.LookupByName(c.columnDefinition.Type.Options.Collate)
charset := env.CollationEnv.LookupCharsetName(collationID)
if charset == "" {
return nil, vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "cannot match charset to collation %v", c.columnDefinition.Type.Options.Collate)
}
defer func() {
c.columnDefinition.Type.Charset.Name = ""
}()
c.columnDefinition.Type.Charset.Name = charset
}
if c.columnDefinition.Type.Charset.Name == "" {
defer func() {
c.columnDefinition.Type.Charset.Name = ""
c.columnDefinition.Type.Options.Collate = ""
}()
c.columnDefinition.Type.Charset.Name = t1cc.charset
if c.columnDefinition.Type.Options.Collate == "" {
defer func() {
c.columnDefinition.Type.Options.Collate = ""
}()
c.columnDefinition.Type.Options.Collate = t1cc.collate
}
if c.columnDefinition.Type.Options.Collate = t1cc.collate; c.columnDefinition.Type.Options.Collate == "" {
collation := env.CollationEnv.DefaultCollationForCharset(t1cc.charset)
if collation == collations.Unknown {
return nil, vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "cannot match collation to charset %v", t1cc.charset)
}
c.columnDefinition.Type.Options.Collate = env.CollationEnv.LookupName(collation)
}
}
if other.columnDefinition.Type.Charset.Name == "" && other.columnDefinition.Type.Options.Collate != "" {
// Column has explicit collation but no charset. We can infer the charset from the collation.
collationID := env.CollationEnv.LookupByName(other.columnDefinition.Type.Options.Collate)
charset := env.CollationEnv.LookupCharsetName(collationID)
if charset == "" {
return nil, vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "cannot match charset to collation %v", other.columnDefinition.Type.Options.Collate)
}
defer func() {
other.columnDefinition.Type.Charset.Name = ""
}()
other.columnDefinition.Type.Charset.Name = charset
}

if other.columnDefinition.Type.Charset.Name == "" {
defer func() {
other.columnDefinition.Type.Charset.Name = ""
other.columnDefinition.Type.Options.Collate = ""
}()
other.columnDefinition.Type.Charset.Name = t2cc.charset
if other.columnDefinition.Type.Options.Collate = t2cc.collate; other.columnDefinition.Type.Options.Collate == "" {
collation := env.CollationEnv.DefaultCollationForCharset(t2cc.charset)
if collation == collations.Unknown {
return nil, vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "cannot match collation to charset %v", t2cc.charset)
}
other.columnDefinition.Type.Options.Collate = env.CollationEnv.LookupName(collation)
}
}
}

if sqlparser.Equals.RefOfColumnDefinition(c.columnDefinition, other.columnDefinition) {
return nil
return nil, nil
}

return NewModifyColumnDiffByDefinition(other.columnDefinition)
return NewModifyColumnDiffByDefinition(other.columnDefinition), nil
}

// IsTextual returns true when this column is of textual type, and is capable of having a character set property
Expand Down
Loading

0 comments on commit eddb39e

Please sign in to comment.