-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
VReplication: use proper column collations in vstreamer #15313
Merged
Merged
Changes from 9 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
04340e4
Add parsing for collations in binlog_row_metadata
mattlord 9963933
Update tests
mattlord cc08784
Fixes
mattlord 6b30708
More adjustments. Skip DDL test while I investigate
mattlord c3a2b42
Fix unit test
mattlord f515078
Corrections in logic and handling
mattlord 0f4a3b7
Update/correct another unit test
mattlord 46765af
Correctly handle 5.7 in unit tests
mattlord f1971b2
Get rid of cruft from earlier parsing work
mattlord 8f6f957
Minor changes after self review
mattlord a77d394
Remove debug log lines and minor nits
mattlord 2a0ad87
Merge remote-tracking branch 'origin/main' into vstream_col_colls
mattlord File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,10 +20,28 @@ | |
"encoding/binary" | ||
|
||
"vitess.io/vitess/go/mysql/binlog" | ||
"vitess.io/vitess/go/vt/proto/vtrpc" | ||
"vitess.io/vitess/go/mysql/collations" | ||
"vitess.io/vitess/go/vt/vterrors" | ||
|
||
querypb "vitess.io/vitess/go/vt/proto/query" | ||
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" | ||
) | ||
|
||
// These are the TABLE_MAP_EVENT's optional metadata field types from: libbinlogevents/include/rows_event.h | ||
// See: https://dev.mysql.com/doc/dev/mysql-server/8.0.34/structbinary__log_1_1Table__map__event_1_1Optional__metadata__fields.html | ||
const ( | ||
TableMapSignedness uint8 = iota + 1 | ||
TableMapDefaultCharset | ||
TableMapColumnCharset | ||
TableMapColumnName | ||
TableMapSetStrValue | ||
TableMapEnumStrValue | ||
TableMapGeometryType | ||
TableMapSimplePrimaryKey | ||
TableMapPrimaryKeyWithPrefix | ||
TableMapEnumAndSetDefaultCharset | ||
TableMapEnumAndSetColumnCharset | ||
TableMapColumnVisibility | ||
) | ||
|
||
// TableMap implements BinlogEvent.TableMap(). | ||
|
@@ -43,6 +61,7 @@ | |
// cc column-def, one byte per column | ||
// <var> column-meta-def (var-len encoded string) | ||
// n NULL-bitmask, length: (cc + 7) / 8 | ||
// n Optional Metadata | ||
func (ev binlogEvent) TableMap(f BinlogFormat) (*TableMap, error) { | ||
data := ev.Bytes()[f.HeaderLength:] | ||
|
||
|
@@ -64,7 +83,7 @@ | |
|
||
columnCount, read, ok := readLenEncInt(data, pos) | ||
if !ok { | ||
return nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "expected column count at position %v (data=%v)", pos, data) | ||
return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "expected column count at position %v (data=%v)", pos, data) | ||
} | ||
pos = read | ||
|
||
|
@@ -73,7 +92,7 @@ | |
|
||
metaLen, read, ok := readLenEncInt(data, pos) | ||
if !ok { | ||
return nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "expected metadata length at position %v (data=%v)", pos, data) | ||
return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "expected metadata length at position %v (data=%v)", pos, data) | ||
} | ||
pos = read | ||
|
||
|
@@ -88,11 +107,20 @@ | |
} | ||
} | ||
if pos != expectedEnd { | ||
return nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "unexpected metadata end: got %v was expecting %v (data=%v)", pos, expectedEnd, data) | ||
return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unexpected metadata end: got %v was expecting %v (data=%v)", pos, expectedEnd, data) | ||
} | ||
|
||
// A bit array that says if each column can be NULL. | ||
result.CanBeNull, _ = newBitmap(data, pos, int(columnCount)) | ||
result.CanBeNull, read = newBitmap(data, pos, int(columnCount)) | ||
pos = read | ||
|
||
// Read any text based column collation values provided in the optional metadata. | ||
//log.Errorf("DEBUG: Remaining optional metadata bytes for %s: %v", result.Name, data[pos:]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All of these DEBUG logging statements will be removed before merging. |
||
var err error | ||
if result.ColumnCollationIDs, err = readColumnCollationIDs(data, pos, int(columnCount)); err != nil { | ||
return nil, err | ||
} | ||
//log.Errorf("DEBUG: table %s; ColumnCollationIDs: %+v", result.Name, result.ColumnCollationIDs) | ||
|
||
return result, nil | ||
} | ||
|
@@ -118,7 +146,7 @@ | |
|
||
default: | ||
// Unknown type. This is used in tests only, so panic. | ||
panic(vterrors.Errorf(vtrpc.Code_INTERNAL, "metadataLength: unhandled data type: %v", typ)) | ||
panic(vterrors.Errorf(vtrpcpb.Code_INTERNAL, "metadataLength: unhandled data type: %v", typ)) | ||
} | ||
} | ||
|
||
|
@@ -154,7 +182,7 @@ | |
|
||
default: | ||
// Unknown types, we can't go on. | ||
return 0, 0, vterrors.Errorf(vtrpc.Code_INTERNAL, "metadataRead: unhandled data type: %v", typ) | ||
return 0, 0, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "metadataRead: unhandled data type: %v", typ) | ||
} | ||
} | ||
|
||
|
@@ -185,8 +213,46 @@ | |
|
||
default: | ||
// Unknown type. This is used in tests only, so panic. | ||
panic(vterrors.Errorf(vtrpc.Code_INTERNAL, "metadataRead: unhandled data type: %v", typ)) | ||
panic(vterrors.Errorf(vtrpcpb.Code_INTERNAL, "metadataRead: unhandled data type: %v", typ)) | ||
} | ||
} | ||
|
||
// readColumnCollationIDs reads from the optional metadata that exists. | ||
// See: https://github.com/mysql/mysql-server/blob/8.0/libbinlogevents/include/rows_event.h | ||
// What's included depends on the server configuration: | ||
// https://dev.mysql.com/doc/refman/en/replication-options-binary-log.html#sysvar_binlog_row_metadata | ||
// and the table definition. | ||
// We only care about the collation IDs of the text based columns and | ||
// this info is provided in all binlog_row_metadata formats. | ||
func readColumnCollationIDs(data []byte, pos, count int) ([]collations.ID, error) { | ||
collationIDs := make([]collations.ID, 0, count) | ||
for pos < len(data) { | ||
fieldType := uint8(data[pos]) | ||
pos++ | ||
|
||
fieldLen, read, ok := readLenEncInt(data, pos) | ||
if !ok || read+int(fieldLen) > len(data) { | ||
return nil, vterrors.New(vtrpcpb.Code_INTERNAL, "error reading optional metadata field length") | ||
} | ||
pos = read | ||
|
||
fieldVal := data[pos : pos+int(fieldLen)] | ||
pos += int(fieldLen) | ||
|
||
//log.Errorf("DEBUG: Optional Metadata Field Type: %v, Length: %v, Value: %v", fieldType, fieldLen, fieldVal) | ||
if fieldType == TableMapDefaultCharset || fieldType == TableMapColumnCharset { // It's one or the other | ||
for i := uint64(0); i < fieldLen; i++ { | ||
v := uint16(fieldVal[i]) | ||
if v == 252 { // The ID is the subsequent 2 bytes | ||
v = binary.LittleEndian.Uint16(fieldVal[i+1 : i+3]) | ||
i += 2 | ||
} | ||
collationIDs = append(collationIDs, collations.ID(v)) | ||
//log.Errorf("DEBUG: charset idx %d: %v", i, v) | ||
} | ||
} | ||
} | ||
return collationIDs, nil | ||
} | ||
|
||
// Rows implements BinlogEvent.TableMap(). | ||
|
@@ -235,7 +301,7 @@ | |
|
||
columnCount, read, ok := readLenEncInt(data, pos) | ||
if !ok { | ||
return result, vterrors.Errorf(vtrpc.Code_INTERNAL, "expected column count at position %v (data=%v)", pos, data) | ||
return result, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "expected column count at position %v (data=%v)", pos, data) | ||
} | ||
pos = read | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think a lot of the code is going to be easier to write and easier to follow if this slice has one entry for each column and you simply fill the non-text columns with a Binary collation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Soooo... I agree! But unfortunately I'd have to perform the same logic to fill in those gaps as the binlog row metadata does not include the column's index or name. I'll think about it some more though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I explored this path, but we don't have access to the collation ENV in order to properly fill in the gaps here. So leaving it as-is for now, where users of this (vstreamer) have access to both the collation ENV and mysqld.