Skip to content

Commit

Permalink
mysql/datetime: Improve TIME parsing logic (vitessio#15135)
Browse files Browse the repository at this point in the history
Signed-off-by: Dirkjan Bussink <[email protected]>
  • Loading branch information
dbussink authored and frouioui committed Feb 6, 2024
1 parent f2a117f commit 2053627
Show file tree
Hide file tree
Showing 10 changed files with 150 additions and 99 deletions.
9 changes: 6 additions & 3 deletions go/mysql/datetime/datetime.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,9 +368,12 @@ func (d Date) YearWeek(mode int) int {
case 1, 3:
year, week := d.ISOWeek()
return year*100 + week
case 4, 5, 6, 7:
// TODO
return 0
case 4, 6:
year, week := d.Sunday4DayWeek()
return year*100 + week
case 5, 7:
year, week := d.MondayWeek()
return year*100 + week
default:
return d.YearWeek(DefaultWeekMode)
}
Expand Down
101 changes: 68 additions & 33 deletions go/mysql/datetime/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,57 +24,64 @@ import (
"vitess.io/vitess/go/mysql/fastparse"
)

func parsetimeHours(tp *timeparts, in string) (out string, ok bool) {
func parsetimeHours(tp *timeparts, in string) (string, TimeState) {
var ok bool
if tp.hour, in, ok = getnumn(in); ok {
tp.day = tp.day + tp.hour/24
tp.hour = tp.hour % 24

switch {
case len(in) == 0:
return "", true
return "", TimeOK
case in[0] == ':':
return parsetimeMinutes(tp, in[1:])
}
}
return "", false
return "", TimePartial
}

func parsetimeMinutes(tp *timeparts, in string) (out string, ok bool) {
func parsetimeMinutes(tp *timeparts, in string) (string, TimeState) {
var ok bool
if tp.min, in, ok = getnum(in, false); ok {
switch {
case tp.min > 59:
return "", false
return "", TimeInvalid
case len(in) == 0:
return "", true
return "", TimeOK
case in[0] == ':':
return parsetimeSeconds(tp, in[1:])
}
}
return "", false
return "", TimePartial
}

func parsetimeSeconds(tp *timeparts, in string) (out string, ok bool) {
func parsetimeSeconds(tp *timeparts, in string) (string, TimeState) {
var ok bool
if tp.sec, in, ok = getnum(in, false); ok {
switch {
case tp.sec > 59:
return "", false
return "", TimeInvalid
case len(in) == 0:
return "", true
return "", TimeOK
case len(in) > 1 && in[0] == '.':
n := 1
for ; n < len(in) && isDigit(in, n); n++ {
}
var l int
tp.nsec, l, ok = parseNanoseconds(in, n)
tp.prec = uint8(l)
return "", ok && len(in) == n
if ok && len(in) == n {
return "", TimeOK
}
return "", TimePartial
}
}
return "", false
return "", TimePartial
}

func parsetimeAny(tp *timeparts, in string) (out string, ok bool) {
func parsetimeAny(tp *timeparts, in string) (out string, state TimeState) {
orig := in
var ok bool
for i := 0; i < len(in); i++ {
switch r := in[i]; {
case isSpace(r):
Expand All @@ -91,7 +98,7 @@ func parsetimeAny(tp *timeparts, in string) (out string, ok bool) {
return parsetimeNoDelimiters(tp, orig)
}
if tp.day > 34 {
return "", clampTimeparts(tp)
return "", clampTimeparts(tp, state)
}
return parsetimeHours(tp, in)
case r == ':':
Expand All @@ -101,8 +108,9 @@ func parsetimeAny(tp *timeparts, in string) (out string, ok bool) {
return parsetimeNoDelimiters(tp, in)
}

func parsetimeNoDelimiters(tp *timeparts, in string) (out string, ok bool) {
func parsetimeNoDelimiters(tp *timeparts, in string) (out string, state TimeState) {
var integral int
var ok bool
for ; integral < len(in); integral++ {
if in[integral] == '.' || !isDigit(in, integral) {
break
Expand All @@ -112,12 +120,9 @@ func parsetimeNoDelimiters(tp *timeparts, in string) (out string, ok bool) {
switch integral {
default:
// MySQL limits this to a numeric value that fits in a 32-bit unsigned integer.
i, _ := fastparse.ParseInt64(in[:integral], 10)
i, _ := fastparse.ParseUint64(in[:integral], 10)
if i > math.MaxUint32 {
return "", false
}
if i < -math.MaxUint32 {
return "", false
return "", TimeInvalid
}

tp.hour, in, ok = getnuml(in, integral-4)
Expand All @@ -132,18 +137,18 @@ func parsetimeNoDelimiters(tp *timeparts, in string) (out string, ok bool) {
case 3, 4:
tp.min, in, ok = getnuml(in, integral-2)
if !ok || tp.min > 59 {
return "", false
return "", TimeInvalid
}
integral = 2
fallthrough

case 1, 2:
tp.sec, in, ok = getnuml(in, integral)
if !ok || tp.sec > 59 {
return "", false
return "", TimeInvalid
}
case 0:
return "", false
return "", TimeInvalid
}

if len(in) > 1 && in[0] == '.' && isDigit(in, 1) {
Expand All @@ -152,14 +157,18 @@ func parsetimeNoDelimiters(tp *timeparts, in string) (out string, ok bool) {
}
var l int
tp.nsec, l, ok = parseNanoseconds(in, n)
if !ok {
state = TimeInvalid
}
tp.prec = uint8(l)
in = in[n:]
}

return in, clampTimeparts(tp) && ok
state = clampTimeparts(tp, state)
return in, state
}

func clampTimeparts(tp *timeparts) bool {
func clampTimeparts(tp *timeparts, state TimeState) TimeState {
// Maximum time is 838:59:59, so we have to clamp
// it to that value here if we otherwise successfully
// parser the time.
Expand All @@ -168,15 +177,31 @@ func clampTimeparts(tp *timeparts) bool {
tp.hour = 22
tp.min = 59
tp.sec = 59
return false
if state == TimeOK {
return TimePartial
}
}
return true
return state
}

func ParseTime(in string, prec int) (t Time, l int, ok bool) {
type TimeState uint8

const (
// TimeOK indicates that the parsed value is valid and complete.
TimeOK TimeState = iota
// TimePartial indicates that the parsed value has a partially parsed value
// but it is not fully complete and valid. There could be additional stray
// data in the input, or it has an overflow.
TimePartial
// TimeInvalid indicates that the parsed value is invalid and no partial
// TIME value could be extracted from the input.
TimeInvalid
)

func ParseTime(in string, prec int) (t Time, l int, state TimeState) {
in = strings.Trim(in, " \t\r\n")
if len(in) == 0 {
return Time{}, 0, false
return Time{}, 0, TimeInvalid
}
var neg bool
if in[0] == '-' {
Expand All @@ -185,11 +210,15 @@ func ParseTime(in string, prec int) (t Time, l int, ok bool) {
}

var tp timeparts
in, ok = parsetimeAny(&tp, in)
ok = clampTimeparts(&tp) && ok
in, state = parsetimeAny(&tp, in)
if state == TimeInvalid {
return Time{}, 0, state
}

state = clampTimeparts(&tp, state)

hours := uint16(24*tp.day + tp.hour)
if !tp.isZero() && neg {
if neg {
hours |= negMask
}

Expand All @@ -206,7 +235,13 @@ func ParseTime(in string, prec int) (t Time, l int, ok bool) {
t = t.Round(prec)
}

return t, prec, ok && len(in) == 0
switch {
case state == TimeOK && len(in) == 0:
state = TimeOK
case state == TimeOK && len(in) > 0:
state = TimePartial
}
return t, prec, state
}

func ParseDate(s string) (Date, bool) {
Expand Down
88 changes: 39 additions & 49 deletions go/mysql/datetime/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,32 +96,33 @@ func TestParseTime(t *testing.T) {
output testTime
norm string
l int
err bool
state TimeState
}{
{input: "00:00:00", norm: "00:00:00.000000", output: testTime{}},
{input: "00:00:00foo", norm: "00:00:00.000000", output: testTime{}, err: true},
{input: "-00:00:00", norm: "-00:00:00.000000", output: testTime{negative: true}},
{input: "00:00:00foo", norm: "00:00:00.000000", output: testTime{}, state: TimePartial},
{input: "11:12:13", norm: "11:12:13.000000", output: testTime{11, 12, 13, 0, false}},
{input: "11:12:13foo", norm: "11:12:13.000000", output: testTime{11, 12, 13, 0, false}, err: true},
{input: "11:12:13foo", norm: "11:12:13.000000", output: testTime{11, 12, 13, 0, false}, state: TimePartial},
{input: "11:12:13.1", norm: "11:12:13.100000", output: testTime{11, 12, 13, 100000000, false}, l: 1},
{input: "11:12:13.foo", norm: "11:12:13.000000", output: testTime{11, 12, 13, 0, false}, err: true},
{input: "11:12:13.1foo", norm: "11:12:13.100000", output: testTime{11, 12, 13, 100000000, false}, l: 1, err: true},
{input: "11:12:13.foo", norm: "11:12:13.000000", output: testTime{11, 12, 13, 0, false}, state: TimePartial},
{input: "11:12:13.1foo", norm: "11:12:13.100000", output: testTime{11, 12, 13, 100000000, false}, l: 1, state: TimePartial},
{input: "11:12:13.123456", norm: "11:12:13.123456", output: testTime{11, 12, 13, 123456000, false}, l: 6},
{input: "11:12:13.000001", norm: "11:12:13.000001", output: testTime{11, 12, 13, 1000, false}, l: 6},
{input: "11:12:13.000000", norm: "11:12:13.000000", output: testTime{11, 12, 13, 0, false}, l: 6},
{input: "11:12:13.123456foo", norm: "11:12:13.123456", output: testTime{11, 12, 13, 123456000, false}, l: 6, err: true},
{input: "11:12:13.123456foo", norm: "11:12:13.123456", output: testTime{11, 12, 13, 123456000, false}, l: 6, state: TimePartial},
{input: "3 11:12:13", norm: "83:12:13.000000", output: testTime{3*24 + 11, 12, 13, 0, false}},
{input: "3 11:12:13foo", norm: "83:12:13.000000", output: testTime{3*24 + 11, 12, 13, 0, false}, err: true},
{input: "3 11:12:13foo", norm: "83:12:13.000000", output: testTime{3*24 + 11, 12, 13, 0, false}, state: TimePartial},
{input: "3 41:12:13", norm: "113:12:13.000000", output: testTime{3*24 + 41, 12, 13, 0, false}},
{input: "3 41:12:13foo", norm: "113:12:13.000000", output: testTime{3*24 + 41, 12, 13, 0, false}, err: true},
{input: "34 23:12:13", norm: "838:59:59.000000", output: testTime{838, 59, 59, 0, false}, err: true},
{input: "35 11:12:13", norm: "838:59:59.000000", output: testTime{838, 59, 59, 0, false}, err: true},
{input: "3 41:12:13foo", norm: "113:12:13.000000", output: testTime{3*24 + 41, 12, 13, 0, false}, state: TimePartial},
{input: "34 23:12:13", norm: "838:59:59.000000", output: testTime{838, 59, 59, 0, false}, state: TimePartial},
{input: "35 11:12:13", norm: "838:59:59.000000", output: testTime{838, 59, 59, 0, false}, state: TimePartial},
{input: "11:12", norm: "11:12:00.000000", output: testTime{11, 12, 0, 0, false}},
{input: "5 11:12", norm: "131:12:00.000000", output: testTime{5*24 + 11, 12, 0, 0, false}},
{input: "-2 11:12", norm: "-59:12:00.000000", output: testTime{2*24 + 11, 12, 0, 0, true}},
{input: "--2 11:12", norm: "00:00:00.000000", err: true},
{input: "nonsense", norm: "00:00:00.000000", err: true},
{input: "--2 11:12", norm: "00:00:00.000000", state: TimeInvalid},
{input: "nonsense", norm: "00:00:00.000000", state: TimeInvalid},
{input: "2 11", norm: "59:00:00.000000", output: testTime{2*24 + 11, 0, 0, 0, false}},
{input: "2 -11", norm: "00:00:02.000000", output: testTime{0, 0, 2, 0, false}, err: true},
{input: "2 -11", norm: "00:00:02.000000", output: testTime{0, 0, 2, 0, false}, state: TimePartial},
{input: "13", norm: "00:00:13.000000", output: testTime{0, 0, 13, 0, false}},
{input: "111213", norm: "11:12:13.000000", output: testTime{11, 12, 13, 0, false}},
{input: "111213.123456", norm: "11:12:13.123456", output: testTime{11, 12, 13, 123456000, false}, l: 6},
Expand All @@ -130,19 +131,21 @@ func TestParseTime(t *testing.T) {
{input: "25:12:13", norm: "25:12:13.000000", output: testTime{25, 12, 13, 0, false}},
{input: "32:35", norm: "32:35:00.000000", output: testTime{32, 35, 0, 0, false}},
{input: "101:34:58", norm: "101:34:58.000000", output: testTime{101, 34, 58, 0, false}},
{input: "101:64:58", norm: "00:00:00.000000", state: TimeInvalid},
{input: "101:34:68", norm: "00:00:00.000000", state: TimeInvalid},
{input: "1", norm: "00:00:01.000000", output: testTime{0, 0, 1, 0, false}},
{input: "11", norm: "00:00:11.000000", output: testTime{0, 0, 11, 0, false}},
{input: "111", norm: "00:01:11.000000", output: testTime{0, 1, 11, 0, false}},
{input: "1111", norm: "00:11:11.000000", output: testTime{0, 11, 11, 0, false}},
{input: "11111", norm: "01:11:11.000000", output: testTime{1, 11, 11, 0, false}},
{input: "111111", norm: "11:11:11.000000", output: testTime{11, 11, 11, 0, false}},
{input: "1foo", norm: "00:00:01.000000", output: testTime{0, 0, 1, 0, false}, err: true},
{input: "11foo", norm: "00:00:11.000000", output: testTime{0, 0, 11, 0, false}, err: true},
{input: "111foo", norm: "00:01:11.000000", output: testTime{0, 1, 11, 0, false}, err: true},
{input: "1111foo", norm: "00:11:11.000000", output: testTime{0, 11, 11, 0, false}, err: true},
{input: "11111foo", norm: "01:11:11.000000", output: testTime{1, 11, 11, 0, false}, err: true},
{input: "111111foo", norm: "11:11:11.000000", output: testTime{11, 11, 11, 0, false}, err: true},
{input: "1111111foo", norm: "111:11:11.000000", output: testTime{111, 11, 11, 0, false}, err: true},
{input: "1foo", norm: "00:00:01.000000", output: testTime{0, 0, 1, 0, false}, state: TimePartial},
{input: "11foo", norm: "00:00:11.000000", output: testTime{0, 0, 11, 0, false}, state: TimePartial},
{input: "111foo", norm: "00:01:11.000000", output: testTime{0, 1, 11, 0, false}, state: TimePartial},
{input: "1111foo", norm: "00:11:11.000000", output: testTime{0, 11, 11, 0, false}, state: TimePartial},
{input: "11111foo", norm: "01:11:11.000000", output: testTime{1, 11, 11, 0, false}, state: TimePartial},
{input: "111111foo", norm: "11:11:11.000000", output: testTime{11, 11, 11, 0, false}, state: TimePartial},
{input: "1111111foo", norm: "111:11:11.000000", output: testTime{111, 11, 11, 0, false}, state: TimePartial},
{input: "-1", norm: "-00:00:01.000000", output: testTime{0, 0, 1, 0, true}},
{input: "-11", norm: "-00:00:11.000000", output: testTime{0, 0, 11, 0, true}},
{input: "-111", norm: "-00:01:11.000000", output: testTime{0, 1, 11, 0, true}},
Expand Down Expand Up @@ -172,44 +175,31 @@ func TestParseTime(t *testing.T) {
{input: "11111.1", norm: "01:11:11.100000", output: testTime{1, 11, 11, 100000000, false}, l: 1},
{input: "111111.1", norm: "11:11:11.100000", output: testTime{11, 11, 11, 100000000, false}, l: 1},
{input: "1111111.1", norm: "111:11:11.100000", output: testTime{111, 11, 11, 100000000, false}, l: 1},
{input: "20000101", norm: "838:59:59.000000", output: testTime{838, 59, 59, 0, false}, err: true},
{input: "-20000101", norm: "-838:59:59.000000", output: testTime{838, 59, 59, 0, true}, err: true},
{input: "999995959", norm: "838:59:59.000000", output: testTime{838, 59, 59, 0, false}, err: true},
{input: "-999995959", norm: "-838:59:59.000000", output: testTime{838, 59, 59, 0, true}, err: true},
{input: "4294965959", norm: "838:59:59.000000", output: testTime{838, 59, 59, 0, false}, err: true},
{input: "-4294965959", norm: "-838:59:59.000000", output: testTime{838, 59, 59, 0, true}, err: true},
{input: "4294975959", norm: "00:00:00.000000", err: true},
{input: "-4294975959", norm: "00:00:00.000000", err: true},
{input: "\t34 foo\t", norm: "00:00:34.000000", output: testTime{0, 0, 34, 0, false}, err: true},
{input: "\t34 1foo\t", norm: "817:00:00.000000", output: testTime{817, 0, 0, 0, false}, err: true},
{input: "\t34 23foo\t", norm: "838:59:59.000000", output: testTime{838, 59, 59, 0, false}, err: true},
{input: "\t35 foo\t", norm: "00:00:35.000000", output: testTime{0, 0, 35, 0, false}, err: true},
{input: "\t35 1foo\t", norm: "838:59:59.000000", output: testTime{838, 59, 59, 0, false}, err: true},
{input: " 255 foo", norm: "00:02:55.000000", output: testTime{0, 2, 55, 0, false}, err: true},
{input: "255", norm: "00:02:55.000000", output: testTime{0, 2, 55, 0, false}},
{input: "20000101", norm: "838:59:59.000000", output: testTime{838, 59, 59, 0, false}, state: TimePartial},
{input: "-20000101", norm: "-838:59:59.000000", output: testTime{838, 59, 59, 0, true}, state: TimePartial},
{input: "999995959", norm: "838:59:59.000000", output: testTime{838, 59, 59, 0, false}, state: TimePartial},
{input: "-999995959", norm: "-838:59:59.000000", output: testTime{838, 59, 59, 0, true}, state: TimePartial},
{input: "4294965959", norm: "838:59:59.000000", output: testTime{838, 59, 59, 0, false}, state: TimePartial},
{input: "-4294965959", norm: "-838:59:59.000000", output: testTime{838, 59, 59, 0, true}, state: TimePartial},
{input: "4294975959", norm: "00:00:00.000000", state: TimeInvalid},
{input: "-4294975959", norm: "00:00:00.000000", state: TimeInvalid},
{input: "\t34 foo\t", norm: "00:00:34.000000", output: testTime{0, 0, 34, 0, false}, state: TimePartial},
{input: "\t34 1foo\t", norm: "817:00:00.000000", output: testTime{817, 0, 0, 0, false}, state: TimePartial},
{input: "\t34 23foo\t", norm: "838:59:59.000000", output: testTime{838, 59, 59, 0, false}, state: TimePartial},
{input: "\t35 foo\t", norm: "00:00:35.000000", output: testTime{0, 0, 35, 0, false}, state: TimePartial},
{input: "\t35 1foo\t", norm: "838:59:59.000000", output: testTime{838, 59, 59, 0, false}, state: TimePartial},
}

for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
got, l, ok := ParseTime(test.input, -1)
if test.err {
assert.Equal(t, test.output.hour, got.Hour())
assert.Equal(t, test.output.minute, got.Minute())
assert.Equal(t, test.output.second, got.Second())
assert.Equal(t, test.output.nanosecond, got.Nanosecond())
assert.Equal(t, test.norm, string(got.AppendFormat(nil, 6)))
assert.Equal(t, test.l, l)
assert.Falsef(t, ok, "did not fail to parse %s", test.input)
return
}

require.True(t, ok)
got, l, state := ParseTime(test.input, -1)
assert.Equal(t, test.state, state)
assert.Equal(t, test.output.hour, got.Hour())
assert.Equal(t, test.output.minute, got.Minute())
assert.Equal(t, test.output.second, got.Second())
assert.Equal(t, test.output.nanosecond, got.Nanosecond())
assert.Equal(t, test.l, l)
assert.Equal(t, test.norm, string(got.AppendFormat(nil, 6)))
assert.Equal(t, test.l, l)
})
}
}
Expand Down
4 changes: 2 additions & 2 deletions go/mysql/json/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -941,8 +941,8 @@ func (v *Value) Time() (datetime.Time, bool) {
if v.t != TypeTime {
return datetime.Time{}, false
}
t, _, ok := datetime.ParseTime(v.s, datetime.DefaultPrecision)
return t, ok
t, _, state := datetime.ParseTime(v.s, datetime.DefaultPrecision)
return t, state == datetime.TimeOK
}

// Object returns the underlying JSON object for the v.
Expand Down
Loading

0 comments on commit 2053627

Please sign in to comment.