From 37da0481b34767d960921e710ae52fe5cd5bd63e Mon Sep 17 00:00:00 2001 From: Noble Mittal Date: Sun, 3 Mar 2024 21:30:00 +0530 Subject: [PATCH 1/3] Create convert_test.go and add tests later Signed-off-by: Noble Mittal --- go/mysql/collations/charset/convert_test.go | 23 +++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 go/mysql/collations/charset/convert_test.go diff --git a/go/mysql/collations/charset/convert_test.go b/go/mysql/collations/charset/convert_test.go new file mode 100644 index 00000000000..5d1bf967f54 --- /dev/null +++ b/go/mysql/collations/charset/convert_test.go @@ -0,0 +1,23 @@ +/* +Copyright 2024 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package charset + +import "testing" + +func TestConvert(t *testing.T) { + +} From 8ee8e3452f2922007ef767583d4ca72c0b4ac258 Mon Sep 17 00:00:00 2001 From: Noble Mittal Date: Tue, 5 Mar 2024 00:54:23 +0530 Subject: [PATCH 2/3] WiP 2 Signed-off-by: Noble Mittal --- go/mysql/collations/charset/charset_test.go | 108 ++++++++++++++++++++ go/mysql/collations/charset/convert_test.go | 79 +++++++++++++- go/mysql/collations/charset/helpers_test.go | 71 +++++++++++++ 3 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 go/mysql/collations/charset/charset_test.go create mode 100644 go/mysql/collations/charset/helpers_test.go diff --git a/go/mysql/collations/charset/charset_test.go b/go/mysql/collations/charset/charset_test.go new file mode 100644 index 00000000000..a961e37c967 --- /dev/null +++ b/go/mysql/collations/charset/charset_test.go @@ -0,0 +1,108 @@ +/* +Copyright 2024 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package charset + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIsMultibyteByName(t *testing.T) { + testCases := []struct { + csname string + want bool + }{ + {"euckr", true}, + {"gb2312", true}, + {"sjis", true}, + {"cp932", true}, + {"eucjpms", true}, + {"ujis", true}, + {"utf16", false}, + {"latin1", false}, + {"binary", false}, + } + + for _, tc := range testCases { + t.Run(tc.csname, func(t *testing.T) { + assert.Equal(t, tc.want, IsMultibyteByName(tc.csname)) + }) + } +} + +func TestIsUnicode(t *testing.T) { + testCases := []struct { + cs Charset + want bool + }{ + {Charset_utf8mb3{}, true}, + {Charset_utf8mb4{}, true}, + {Charset_utf16{}, true}, + {Charset_utf16le{}, true}, + {Charset_ucs2{}, true}, + {Charset_utf32{}, true}, + {&testCharset1{}, false}, + } + + for _, tc := range testCases { + t.Run(tc.cs.Name(), func(t *testing.T) { + assert.Equal(t, tc.want, IsUnicode(tc.cs)) + }) + } +} + +func TestIsUnicodeByName(t *testing.T) { + testCases := []struct { + csname string + want bool + }{ + {"utf8", true}, + {"utf8mb3", true}, + {"utf8mb4", true}, + {"utf16", true}, + {"utf16le", true}, + {"ucs2", true}, + {"utf32", true}, + {"binary", false}, + } + + for _, tc := range testCases { + t.Run(tc.csname, func(t *testing.T) { + assert.Equal(t, tc.want, IsUnicodeByName(tc.csname)) + }) + } +} + +func TestIsBackslashSafe(t *testing.T) { + testCases := []struct { + cs Charset + want bool + }{ + {Charset_sjis{}, false}, + {Charset_cp932{}, false}, + {Charset_gb18030{}, false}, + {Charset_utf16le{}, true}, + {&testCharset1{}, true}, + } + + for _, tc := range testCases { + t.Run(tc.cs.Name(), func(t *testing.T) { + assert.Equal(t, tc.want, IsBackslashSafe(tc.cs)) + }) + } +} diff --git a/go/mysql/collations/charset/convert_test.go b/go/mysql/collations/charset/convert_test.go index 5d1bf967f54..6d737a91faf 100644 --- a/go/mysql/collations/charset/convert_test.go +++ b/go/mysql/collations/charset/convert_test.go @@ -16,8 +16,85 @@ limitations under the License. package charset -import "testing" +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +// TODO: These types can be removed, use binary_charset instead. +type testCharset1 struct{} + +func (c *testCharset1) Name() string { + return "testCharset1" +} + +func (c *testCharset1) SupportsSupplementaryChars() bool { + return true +} + +func (c *testCharset1) IsSuperset(other Charset) bool { + return true +} + +func (c *testCharset1) MaxWidth() int { + return 1 +} + +func (c *testCharset1) EncodeRune([]byte, rune) int { + return 0 +} + +func (c *testCharset1) DecodeRune(bytes []byte) (rune, int) { + if len(bytes) < 1 { + return RuneError, 0 + } + return 1, 1 +} + +type testCharset2 struct{} + +func (c *testCharset2) Name() string { + return "testCharset2" +} + +func (c *testCharset2) SupportsSupplementaryChars() bool { + return true +} + +func (c *testCharset2) IsSuperset(other Charset) bool { + return false +} + +func (c *testCharset2) MaxWidth() int { + return 1 +} + +func (c *testCharset2) EncodeRune([]byte, rune) int { + return 0 +} + +func (c *testCharset2) DecodeRune([]byte) (rune, int) { + return 1, 1 +} func TestConvert(t *testing.T) { + dstCharset := &testCharset1{} + srcCharset := &testCharset2{} + src := []byte("src") + + res, err := Convert(nil, dstCharset, src, srcCharset) + assert.NoError(t, err) + assert.Equal(t, src, res) + + dst := []byte("dst") + res, err = Convert(dst, dstCharset, src, srcCharset) + assert.NoError(t, err) + assert.Equal(t, []byte("dstsrc"), res) + // TODO: Write more tests + res, err = Convert(nil, &testCharset2{}, src, &testCharset1{}) + assert.NoError(t, err) + fmt.Println(res) } diff --git a/go/mysql/collations/charset/helpers_test.go b/go/mysql/collations/charset/helpers_test.go new file mode 100644 index 00000000000..2e4e040e3e0 --- /dev/null +++ b/go/mysql/collations/charset/helpers_test.go @@ -0,0 +1,71 @@ +/* +Copyright 2024 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package charset + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSlice(t *testing.T) { + s := Slice(Charset_binary{}, []byte("testString"), 1, 4) + assert.Equal(t, []byte("est"), s) + + s = Slice(&testCharset1{}, []byte("testString"), 2, 5) + assert.Equal(t, []byte("stS"), s) + + s = Slice(&testCharset1{}, []byte("testString"), 2, 20) + assert.Equal(t, []byte("stString"), s) + + // Multibyte tests + s = Slice(Charset_utf8mb4{}, []byte("πŸ˜ŠπŸ˜‚πŸ€’"), 1, 3) + assert.Equal(t, []byte("πŸ˜‚πŸ€’"), s) + + s = Slice(Charset_utf8mb4{}, []byte("πŸ˜ŠπŸ˜‚πŸ€’"), -2, 4) + assert.Equal(t, []byte("πŸ˜ŠπŸ˜‚πŸ€’"), s) +} + +func TestValidate(t *testing.T) { + // TODO: Add more tests + + in := "testString" + ok := Validate(Charset_binary{}, []byte(in)) + assert.True(t, ok, "'%s' should be validated from binary charset", in) + + ok = Validate(&testCharset1{}, nil) + assert.True(t, ok, "Validate should return true for empty string irrespective of charset") + + ok = Validate(&testCharset1{}, []byte(in)) + assert.True(t, ok) +} + +func TestLength(t *testing.T) { + in := "testString" + l := Length(Charset_binary{}, []byte(in)) + assert.Equal(t, 10, l) + + l = Length(&testCharset1{}, []byte(in)) + assert.Equal(t, 10, l) + + // Multibyte tests + l = Length(Charset_utf8mb4{}, []byte("πŸ˜ŠπŸ˜‚πŸ€’")) + assert.Equal(t, 3, l) + + l = Length(Charset_utf8mb4{}, []byte("ν•œκ΅­μ–΄ μ‹œν—˜")) + assert.Equal(t, 6, l) +} From e83323511340cfd563fef1ecd164805d5beed6b1 Mon Sep 17 00:00:00 2001 From: Noble Mittal Date: Mon, 11 Mar 2024 16:26:21 +0530 Subject: [PATCH 3/3] Add required tests for go/mysql/collations/charset Signed-off-by: Noble Mittal --- go/mysql/collations/charset/convert_test.go | 294 ++++++++++++++++++-- go/mysql/collations/charset/helpers_test.go | 102 ++++--- 2 files changed, 348 insertions(+), 48 deletions(-) diff --git a/go/mysql/collations/charset/convert_test.go b/go/mysql/collations/charset/convert_test.go index 6d737a91faf..df44f961743 100644 --- a/go/mysql/collations/charset/convert_test.go +++ b/go/mysql/collations/charset/convert_test.go @@ -17,13 +17,11 @@ limitations under the License. package charset import ( - "fmt" "testing" "github.com/stretchr/testify/assert" ) -// TODO: These types can be removed, use binary_charset instead. type testCharset1 struct{} func (c *testCharset1) Name() string { @@ -75,26 +73,290 @@ func (c *testCharset2) EncodeRune([]byte, rune) int { return 0 } -func (c *testCharset2) DecodeRune([]byte) (rune, int) { - return 1, 1 +func (c *testCharset2) DecodeRune(bytes []byte) (rune, int) { + if len(bytes) < 1 { + return RuneError, 0 + } + return rune(bytes[0]), 1 +} + +func (c *testCharset2) Convert(_, src []byte, from Charset) ([]byte, error) { + return src, nil } func TestConvert(t *testing.T) { - dstCharset := &testCharset1{} - srcCharset := &testCharset2{} - src := []byte("src") + testCases := []struct { + src []byte + srcCharset Charset + dst []byte + dstCharset Charset + want []byte + err string + }{ + { + src: []byte("testSrc"), + srcCharset: Charset_utf8mb3{}, + dst: []byte("testDst"), + dstCharset: Charset_utf8mb4{}, + want: []byte("testDsttestSrc"), + }, + { + src: []byte("testSrc"), + srcCharset: Charset_utf8mb3{}, + dst: nil, + dstCharset: Charset_utf8mb4{}, + want: []byte("testSrc"), + }, + { + src: []byte("testSrc"), + srcCharset: Charset_utf8mb4{}, + dst: nil, + dstCharset: Charset_utf8mb3{}, + want: []byte("testSrc"), + }, + { + src: []byte("testSrc"), + srcCharset: Charset_utf8mb4{}, + dst: []byte("testDst"), + dstCharset: Charset_utf8mb3{}, + want: []byte("testDsttestSrc"), + }, + { + src: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + srcCharset: Charset_utf8mb4{}, + dst: []byte("testDst"), + dstCharset: Charset_utf8mb3{}, + want: []byte("testDst???"), + err: "Cannot convert string", + }, + { + src: []byte("testSrc"), + srcCharset: Charset_binary{}, + dst: []byte("testDst"), + dstCharset: Charset_utf8mb3{}, + want: []byte("testDsttestSrc"), + }, + { + src: []byte{00, 65, 00, 66}, + srcCharset: Charset_ucs2{}, + dst: []byte("testDst"), + dstCharset: Charset_utf8mb3{}, + want: []byte("testDstAB"), + }, + { + src: []byte{00, 65, 00, 66}, + srcCharset: Charset_ucs2{}, + dst: nil, + dstCharset: Charset_utf8mb3{}, + want: []byte("AB"), + }, + { + src: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + srcCharset: Charset_utf8mb3{}, + dst: nil, + dstCharset: &testCharset2{}, + want: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + }, + } - res, err := Convert(nil, dstCharset, src, srcCharset) - assert.NoError(t, err) - assert.Equal(t, src, res) + for _, tc := range testCases { + res, err := Convert(tc.dst, tc.dstCharset, tc.src, tc.srcCharset) + if tc.err != "" { + assert.ErrorContains(t, err, tc.err) + assert.Equal(t, tc.want, res) + } else { + assert.NoError(t, err) + assert.Equal(t, tc.want, res) + } + } +} + +func TestExpand(t *testing.T) { + testCases := []struct { + dst []rune + src []byte + srcCharset Charset + want []rune + }{ + { + dst: []rune("testDst"), + src: []byte("testSrc"), + srcCharset: Charset_utf8mb3{}, + want: []rune("testSrc"), + }, + { + dst: nil, + src: []byte("testSrc"), + srcCharset: Charset_utf8mb3{}, + want: []rune("testSrc"), + }, + { + dst: nil, + src: []byte("testSrc"), + srcCharset: Charset_binary{}, + want: []rune("testSrc"), + }, + { + dst: []rune("testDst"), + src: []byte("testSrc"), + srcCharset: Charset_binary{}, + want: []rune("testDsttestSrc"), + }, + { + dst: []rune("testDst"), + src: []byte{0, 0, 0, 0x41}, + srcCharset: Charset_utf32{}, + want: []rune("testDstA"), + }, + { + dst: nil, + src: []byte{0xFF}, + srcCharset: Charset_latin1{}, + want: []rune("ΓΏ"), + }, + // multibyte case + { + dst: []rune("testDst"), + src: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + srcCharset: Charset_utf8mb4{}, + want: []rune("πŸ˜ŠπŸ˜‚πŸ€’"), + }, + } + + for _, tc := range testCases { + res := Expand(tc.dst, tc.src, tc.srcCharset) + + assert.Equal(t, tc.want, res) + } +} + +func TestCollapse(t *testing.T) { + testCases := []struct { + dst []byte + src []rune + dstCharset Charset + want []byte + }{ + { + dst: []byte("testDst"), + src: []rune("testSrc"), + dstCharset: Charset_utf8mb3{}, + want: []byte("testDsttestSrc"), + }, + { + dst: nil, + src: []rune("testSrc"), + dstCharset: Charset_utf8mb3{}, + want: []byte("testSrc"), + }, + { + dst: []byte("testDst"), + src: []rune("testSrc"), + dstCharset: Charset_utf8mb4{}, + want: []byte("testDsttestSrc"), + }, + { + dst: []byte("testDst"), + src: []rune("testSrc"), + dstCharset: Charset_binary{}, + want: []byte("testDsttestSrc"), + }, + { + dst: nil, + src: []rune("testSrc"), + dstCharset: Charset_binary{}, + want: []byte("testSrc"), + }, + { + dst: []byte("dst"), + src: []rune("src"), + dstCharset: Charset_ucs2{}, + want: []byte{100, 115, 116, 0, 115, 0, 114, 0, 99}, + }, + { + dst: nil, + src: []rune("src"), + dstCharset: Charset_ucs2{}, + want: []byte{0, 115, 0, 114, 0, 99}, + }, + // unsupported encoding case + { + dst: nil, + src: []rune{0xffff1}, + dstCharset: Charset_ucs2{}, + want: []byte{0, 63}, + }, + } + + for _, tc := range testCases { + res := Collapse(tc.dst, tc.src, tc.dstCharset) + + assert.Equal(t, tc.want, res) + } +} + +func TestConvertFromUTF8(t *testing.T) { dst := []byte("dst") - res, err = Convert(dst, dstCharset, src, srcCharset) - assert.NoError(t, err) - assert.Equal(t, []byte("dstsrc"), res) + src := []byte("πŸ˜ŠπŸ˜‚πŸ€’") - // TODO: Write more tests - res, err = Convert(nil, &testCharset2{}, src, &testCharset1{}) + res, err := ConvertFromUTF8(dst, Charset_utf8mb4{}, src) assert.NoError(t, err) - fmt.Println(res) + assert.Equal(t, []byte("dstπŸ˜ŠπŸ˜‚πŸ€’"), res) + + res, err = ConvertFromUTF8(dst, Charset_utf8mb3{}, src) + assert.ErrorContains(t, err, "Cannot convert string") + assert.Equal(t, []byte("dst???"), res) +} + +func TestConvertFromBinary(t *testing.T) { + testCases := []struct { + dst []byte + cs Charset + in []byte + want []byte + err string + }{ + { + dst: []byte("testDst"), + cs: Charset_utf8mb4{}, + in: []byte("testString"), + want: []byte("testDsttestString"), + }, + { + cs: Charset_utf16le{}, + in: []byte("testForOddLen"), + want: append([]byte{0}, []byte("testForOddLen")...), + }, + { + cs: Charset_utf16{}, + in: []byte("testForEvenLen"), + want: []byte("testForEvenLen"), + }, + // multibyte case + { + dst: []byte("testDst"), + cs: Charset_utf8mb4{}, + in: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + want: []byte("testDstπŸ˜ŠπŸ˜‚πŸ€’"), + }, + // unsuppported encoding case + { + cs: Charset_utf32{}, + in: []byte{0xff}, + err: "Cannot convert string", + }, + } + + for _, tc := range testCases { + got, err := ConvertFromBinary(tc.dst, tc.cs, tc.in) + + if tc.want == nil { + assert.ErrorContains(t, err, tc.err) + assert.Nil(t, got) + } else { + assert.NoError(t, err) + assert.Equal(t, tc.want, got) + } + } } diff --git a/go/mysql/collations/charset/helpers_test.go b/go/mysql/collations/charset/helpers_test.go index 2e4e040e3e0..4f8d367e880 100644 --- a/go/mysql/collations/charset/helpers_test.go +++ b/go/mysql/collations/charset/helpers_test.go @@ -23,49 +23,87 @@ import ( ) func TestSlice(t *testing.T) { - s := Slice(Charset_binary{}, []byte("testString"), 1, 4) - assert.Equal(t, []byte("est"), s) - - s = Slice(&testCharset1{}, []byte("testString"), 2, 5) - assert.Equal(t, []byte("stS"), s) - - s = Slice(&testCharset1{}, []byte("testString"), 2, 20) - assert.Equal(t, []byte("stString"), s) - - // Multibyte tests - s = Slice(Charset_utf8mb4{}, []byte("πŸ˜ŠπŸ˜‚πŸ€’"), 1, 3) - assert.Equal(t, []byte("πŸ˜‚πŸ€’"), s) - - s = Slice(Charset_utf8mb4{}, []byte("πŸ˜ŠπŸ˜‚πŸ€’"), -2, 4) - assert.Equal(t, []byte("πŸ˜ŠπŸ˜‚πŸ€’"), s) + testCases := []struct { + in []byte + cs Charset + from int + to int + want []byte + }{ + { + in: []byte("testString"), + cs: Charset_binary{}, + from: 1, + to: 4, + want: []byte("est"), + }, + { + in: []byte("testString"), + cs: &testCharset1{}, + from: 2, + to: 5, + want: []byte("stS"), + }, + { + in: []byte("testString"), + cs: &testCharset1{}, + from: 2, + to: 20, + want: []byte("stString"), + }, + // Multibyte cases + { + in: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + cs: Charset_utf8mb4{}, + from: 1, + to: 3, + want: []byte("πŸ˜‚πŸ€’"), + }, + { + in: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + cs: Charset_utf8mb4{}, + from: -2, + to: 4, + want: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + }, + } + + for _, tc := range testCases { + s := Slice(tc.cs, tc.in, tc.from, tc.to) + assert.Equal(t, tc.want, s) + } } func TestValidate(t *testing.T) { - // TODO: Add more tests - in := "testString" ok := Validate(Charset_binary{}, []byte(in)) - assert.True(t, ok, "'%s' should be validated from binary charset", in) + assert.True(t, ok, "%q should be valid for binary charset", in) ok = Validate(&testCharset1{}, nil) assert.True(t, ok, "Validate should return true for empty string irrespective of charset") ok = Validate(&testCharset1{}, []byte(in)) - assert.True(t, ok) + assert.True(t, ok, "%q should be valid for testCharset1", in) + + ok = Validate(Charset_utf16le{}, []byte{0x41}) + assert.False(t, ok, "%v should not be valid for utf16le charset", []byte{0x41}) } func TestLength(t *testing.T) { - in := "testString" - l := Length(Charset_binary{}, []byte(in)) - assert.Equal(t, 10, l) - - l = Length(&testCharset1{}, []byte(in)) - assert.Equal(t, 10, l) - - // Multibyte tests - l = Length(Charset_utf8mb4{}, []byte("πŸ˜ŠπŸ˜‚πŸ€’")) - assert.Equal(t, 3, l) - - l = Length(Charset_utf8mb4{}, []byte("ν•œκ΅­μ–΄ μ‹œν—˜")) - assert.Equal(t, 6, l) + testCases := []struct { + in []byte + cs Charset + want int + }{ + {[]byte("testString"), Charset_binary{}, 10}, + {[]byte("testString"), &testCharset1{}, 10}, + // Multibyte cases + {[]byte("πŸ˜ŠπŸ˜‚πŸ€’"), Charset_utf8mb4{}, 3}, + {[]byte("ν•œκ΅­μ–΄ μ‹œν—˜"), Charset_utf8mb4{}, 6}, + } + + for _, tc := range testCases { + l := Length(tc.cs, tc.in) + assert.Equal(t, tc.want, l) + } }