Skip to content

Commit

Permalink
evalengine: Implement BIN, OCT & CHAR functions (#15226)
Browse files Browse the repository at this point in the history
Signed-off-by: Dirkjan Bussink <[email protected]>
  • Loading branch information
dbussink authored Feb 15, 2024
1 parent 09c3d56 commit 532f767
Show file tree
Hide file tree
Showing 8 changed files with 317 additions and 6 deletions.
45 changes: 41 additions & 4 deletions go/mysql/fastparse/fastparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,19 @@ import (
"vitess.io/vitess/go/hack"
)

func ParseUint64(s string, base int) (uint64, error) {
return parseUint64(s, base, false)
}

func ParseUint64WithNeg(s string, base int) (uint64, error) {
return parseUint64(s, base, true)
}

// ParseUint64 parses uint64 from s.
//
// It is equivalent to strconv.ParseUint(s, base, 64) in case it succeeds,
// but on error it will return the best effort value of what it has parsed so far.
func ParseUint64(s string, base int) (uint64, error) {
func parseUint64(s string, base int, allowNeg bool) (uint64, error) {
if len(s) == 0 {
return 0, fmt.Errorf("cannot parse uint64 from empty string")
}
Expand All @@ -45,6 +53,22 @@ func ParseUint64(s string, base int) (uint64, error) {
i++
}

if i >= uint(len(s)) {
return 0, fmt.Errorf("cannot parse uint64 from %q", s)
}
// For some reason, MySQL parses things as uint64 even with
// a negative sign and then turns it into the 2s complement value.
minus := s[i] == '-'
if minus {
if !allowNeg {
return 0, fmt.Errorf("cannot parse uint64 from %q", s)
}
i++
if i >= uint(len(s)) {
return 0, fmt.Errorf("cannot parse uint64 from %q", s)
}
}

d := uint64(0)
j := i
next:
Expand Down Expand Up @@ -75,17 +99,23 @@ next:
cutoff = math.MaxUint64/uint64(base) + 1
}
if d >= cutoff {
if minus {
return 0, fmt.Errorf("cannot parse uint64 from %q: %w", s, ErrOverflow)
}
return math.MaxUint64, fmt.Errorf("cannot parse uint64 from %q: %w", s, ErrOverflow)
}
v := d*uint64(base) + uint64(b)
if v < d {
if minus {
return 0, fmt.Errorf("cannot parse uint64 from %q: %w", s, ErrOverflow)
}
return math.MaxUint64, fmt.Errorf("cannot parse uint64 from %q: %w", s, ErrOverflow)
}
d = v
i++
}
if i <= j {
return d, fmt.Errorf("cannot parse uint64 from %q", s)
return uValue(d, minus), fmt.Errorf("cannot parse uint64 from %q", s)
}

for i < uint(len(s)) {
Expand All @@ -97,9 +127,9 @@ next:

if i < uint(len(s)) {
// Unparsed tail left.
return d, fmt.Errorf("unparsed tail left after parsing uint64 from %q: %q", s, s[i:])
return uValue(d, minus), fmt.Errorf("unparsed tail left after parsing uint64 from %q: %q", s, s[i:])
}
return d, nil
return uValue(d, minus), nil
}

var ErrOverflow = errors.New("overflow")
Expand Down Expand Up @@ -261,3 +291,10 @@ func isSpace(c byte) bool {
return false
}
}

func uValue(v uint64, neg bool) uint64 {
if neg {
return -v
}
return v
}
66 changes: 66 additions & 0 deletions go/mysql/fastparse/fastparse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,17 @@ func TestParseUint64(t *testing.T) {
base: 2,
expected: 1,
},
{
input: "-",
base: 10,
expected: 0,
err: `cannot parse uint64 from "-"`,
},
{
input: "-1",
base: 10,
err: `cannot parse uint64 from "-1"`,
},
{
input: "10",
base: 2,
Expand Down Expand Up @@ -478,6 +489,61 @@ func TestParseUint64(t *testing.T) {
}
}

func TestParseUint64WithNeg(t *testing.T) {
testcases := []struct {
input string
base int
expected uint64
err string
}{
{
input: "-",
base: 10,
expected: 0,
err: `cannot parse uint64 from "-"`,
},
{
input: "-1",
base: 10,
expected: 18446744073709551615,
},
{
input: "-9223372036854775808",
base: 10,
expected: 9223372036854775808,
},
{
input: "-9223372036854775809",
base: 10,
expected: 9223372036854775807,
},
{
input: "-18446744073709551616",
base: 10,
expected: 0,
err: `cannot parse uint64 from "-18446744073709551616": overflow`,
},
{
input: "-31415926535897932384",
base: 10,
expected: 0,
err: `cannot parse uint64 from "-31415926535897932384": overflow`,
},
}
for _, tc := range testcases {
t.Run(tc.input, func(t *testing.T) {
val, err := ParseUint64WithNeg(tc.input, tc.base)
if tc.err == "" {
require.NoError(t, err)
require.Equal(t, tc.expected, val)
} else {
require.Equal(t, tc.expected, val)
require.EqualError(t, err, tc.err)
}
})
}
}

func TestParseFloat64(t *testing.T) {
testcases := []struct {
input string
Expand Down
12 changes: 12 additions & 0 deletions go/vt/vtgate/evalengine/cached_size.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 24 additions & 1 deletion go/vt/vtgate/evalengine/compiler_asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -2008,7 +2008,7 @@ func (asm *assembler) Fn_CONV_bu(offset int, baseOffset int) {
i, err := fastparse.ParseInt64(arg.string(), int(base.i))
u = uint64(i)
if errors.Is(err, fastparse.ErrOverflow) {
u, _ = fastparse.ParseUint64(arg.string(), int(base.i))
u, _ = fastparse.ParseUint64WithNeg(arg.string(), int(base.i))
}
env.vm.stack[env.vm.sp-offset] = env.vm.arena.newEvalUint64(u)
return 1
Expand Down Expand Up @@ -4206,6 +4206,29 @@ func (asm *assembler) Fn_CONCAT_WS(tt querypb.Type, tc collations.TypedCollation
}, "FN CONCAT_WS VARCHAR(SP-1) VARCHAR(SP-2)...VARCHAR(SP-N)")
}

func (asm *assembler) Fn_CHAR(tt querypb.Type, tc collations.TypedCollation, args int) {
cs := colldata.Lookup(tc.Collation).Charset()
asm.adjustStack(-(args - 1))
asm.emit(func(env *ExpressionEnv) int {
buf := make([]byte, 0, args)
for i := 0; i < args; i++ {
if env.vm.stack[env.vm.sp-args+i] == nil {
continue
}
arg := env.vm.stack[env.vm.sp-args+i].(*evalInt64)
buf = encodeChar(buf, uint32(arg.i))
}

if charset.Validate(cs, buf) {
env.vm.stack[env.vm.sp-args] = env.vm.arena.newEvalRaw(buf, tt, tc)
} else {
env.vm.stack[env.vm.sp-args] = nil
}
env.vm.sp -= args - 1
return 1
}, "FN CHAR INT64(SP-1) INT64(SP-2)...INT64(SP-N)")
}

func (asm *assembler) Fn_BIN_TO_UUID0(col collations.TypedCollation) {
asm.emit(func(env *ExpressionEnv) int {
arg := env.vm.stack[env.vm.sp-1].(*evalBytes)
Expand Down
2 changes: 1 addition & 1 deletion go/vt/vtgate/evalengine/fn_numeric.go
Original file line number Diff line number Diff line change
Expand Up @@ -1332,7 +1332,7 @@ func (call *builtinConv) eval(env *ExpressionEnv) (eval, error) {
i, err := fastparse.ParseInt64(nStr.string(), int(fromBase))
u = uint64(i)
if errors.Is(err, fastparse.ErrOverflow) {
u, _ = fastparse.ParseUint64(nStr.string(), int(fromBase))
u, _ = fastparse.ParseUint64WithNeg(nStr.string(), int(fromBase))
}
}

Expand Down
84 changes: 84 additions & 0 deletions go/vt/vtgate/evalengine/fn_string.go
Original file line number Diff line number Diff line change
Expand Up @@ -1643,3 +1643,87 @@ func (call *builtinConcatWs) compile(c *compiler) (ctype, error) {

return ctype{Type: tt, Flag: args[0].Flag, Col: tc}, nil
}

type builtinChar struct {
CallExpr
collate collations.ID
}

var _ IR = (*builtinChar)(nil)

func (call *builtinChar) eval(env *ExpressionEnv) (eval, error) {
vals := make([]eval, 0, len(call.Arguments))
for _, arg := range call.Arguments {
a, err := arg.eval(env)
if err != nil {
return nil, err
}
if a == nil {
continue
}
vals = append(vals, a)
}

buf := make([]byte, 0, len(vals))
for _, v := range vals {
buf = encodeChar(buf, uint32(evalToInt64(v).i))
}
if call.collate == collations.CollationBinaryID {
return newEvalBinary(buf), nil
}

cs := colldata.Lookup(call.collate).Charset()
if !charset.Validate(cs, buf) {
return nil, nil
}

return newEvalText(buf, collations.TypedCollation{
Collation: call.collate,
Coercibility: collations.CoerceCoercible,
Repertoire: collations.RepertoireASCII,
}), nil
}

func (call *builtinChar) compile(c *compiler) (ctype, error) {
for _, arg := range call.Arguments {
a, err := arg.compile(c)
if err != nil {
return ctype{}, err
}
j := c.compileNullCheck1(a)
switch a.Type {
case sqltypes.Int64:
// No-op, already correct type
case sqltypes.Uint64:
c.asm.Convert_ui(1)
default:
c.asm.Convert_xi(1)
}
c.asm.jumpDestination(j)
}
tt := sqltypes.VarBinary
if call.collate != collations.CollationBinaryID {
tt = sqltypes.VarChar
}
col := collations.TypedCollation{
Collation: call.collate,
Coercibility: collations.CoerceCoercible,
Repertoire: collations.RepertoireASCII,
}
c.asm.Fn_CHAR(tt, col, len(call.Arguments))
return ctype{Type: tt, Flag: flagNullable, Col: col}, nil
}

func encodeChar(buf []byte, i uint32) []byte {
switch {
case i < 0x100:
buf = append(buf, byte(i))
case i < 0x10000:
buf = append(buf, byte(i>>8), byte(i))
case i < 0x1000000:
buf = append(buf, byte(i>>16), byte(i>>8), byte(i))
default:
buf = append(buf, byte(i>>24), byte(i>>16), byte(i>>8), byte(i))
}
return buf
}
Loading

0 comments on commit 532f767

Please sign in to comment.