Skip to content

Commit

Permalink
Stats safer encode (#8379)
Browse files Browse the repository at this point in the history
* [no-release-notes] tests for varbinary stats

* bump

* bump

* mcv row types fix

* fix bats

* nick's comments

* [statsnoms] safer encode

* fix loadBoundRow

* revert testing edits

* bad merge

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

* try to fix race

* revert testing change

---------

Co-authored-by: max-hoffman <[email protected]>
  • Loading branch information
max-hoffman and max-hoffman authored Sep 24, 2024
1 parent 2f79023 commit bd95816
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 29 deletions.
19 changes: 19 additions & 0 deletions go/libraries/doltcore/sqle/enginetest/stats_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,22 @@ var DoltStatsIOTests = []queries.ScriptTest{
},
},
},
{
Name: "varbinary encoding bug",
SetUpScript: []string{
`create table a (a varbinary (32) primary key)`,
"insert into a values ('hello, world')",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: `analyze table a`,
},
{
Query: "select count(*) from dolt_statistics",
Expected: []sql.Row{{1}},
},
},
},
{
Name: "boundary nils don't panic when trying to convert to the zero type",
SetUpScript: []string{
Expand Down Expand Up @@ -590,6 +606,9 @@ var StatBranchTests = []queries.ScriptTest{
{
Query: "call dolt_stats_stop()",
},
{
Query: "select sleep(.1)",
},
{
Query: "call dolt_stats_drop()",
},
Expand Down
52 changes: 24 additions & 28 deletions go/libraries/doltcore/sqle/statsnoms/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,11 @@ import (

func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.StatQualifier]*statspro.DoltStats, error) {
qualToStats := make(map[sql.StatQualifier]*statspro.DoltStats)

iter, err := NewStatsIter(ctx, m)
if err != nil {
return nil, err
}
currentStat := statspro.NewDoltStats()
var lowerBound sql.Row
for {
row, err := iter.Next(ctx)
if errors.Is(err, io.EOF) {
Expand Down Expand Up @@ -108,15 +106,10 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
}
}

boundRow, err := iter.ParseRow(boundRowStr)
if err != nil {
return nil, err
}

qual := sql.NewStatQualifier(dbName, tableName, indexName)
if currentStat.Statistic.Qual.String() != qual.String() {
if !currentStat.Statistic.Qual.Empty() {
currentStat.Statistic.LowerBnd, err = loadLowerBound(ctx, currentStat.Statistic.Qual)
currentStat.Statistic.LowerBnd, currentStat.Tb, err = loadLowerBound(ctx, db, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {
return nil, err
}
Expand All @@ -133,7 +126,10 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
currentStat = statspro.NewDoltStats()
currentStat.Statistic.Qual = qual
currentStat.Statistic.Cols = columns
currentStat.Statistic.LowerBnd = lowerBound
currentStat.Statistic.LowerBnd, currentStat.Tb, err = loadLowerBound(ctx, db, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {
return nil, err
}
}

if currentStat.Statistic.Hist == nil {
Expand All @@ -144,6 +140,11 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
currentStat.Statistic.Qual = qual
}

boundRow, err := DecodeRow(ctx, m.NodeStore(), boundRowStr, currentStat.Tb)
if err != nil {
return nil, err
}

bucket := statspro.DoltBucket{
Chunk: commit,
Created: createdAt,
Expand All @@ -166,7 +167,7 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
currentStat.Statistic.Created = createdAt
}
}
currentStat.Statistic.LowerBnd, err = loadLowerBound(ctx, currentStat.Statistic.Qual)
currentStat.Statistic.LowerBnd, currentStat.Tb, err = loadLowerBound(ctx, db, currentStat.Statistic.Qual, len(currentStat.Columns()))
if err != nil {
return nil, err
}
Expand All @@ -193,19 +194,14 @@ func parseTypeStrings(typs []string) ([]sql.Type, error) {
return ret, nil
}

func loadLowerBound(ctx *sql.Context, qual sql.StatQualifier) (sql.Row, error) {
dSess := dsess.DSessFromSess(ctx.Session)
roots, ok := dSess.GetRoots(ctx, qual.Db())
func loadLowerBound(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier, cols int) (sql.Row, *val.TupleBuilder, error) {
root, err := db.GetRoot(ctx)
table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: qual.Table()})
if !ok {
return nil, nil
}

table, ok, err := roots.Head.GetTable(ctx, doltdb.TableName{Name: qual.Table()})
if !ok {
return nil, nil
return nil, nil, sql.ErrTableNotFound.New(qual.Table())
}
if err != nil {
return nil, err
return nil, nil, err
}

var idx durable.Index
Expand All @@ -215,25 +211,25 @@ func loadLowerBound(ctx *sql.Context, qual sql.StatQualifier) (sql.Row, error) {
idx, err = table.GetIndexRowData(ctx, qual.Index())
}
if err != nil {
return nil, err
return nil, nil, err
}

prollyMap := durable.ProllyMapFromIndex(idx)
keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc())
keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(cols))
buffPool := prollyMap.NodeStore().Pool()

if cnt, err := prollyMap.Count(); err != nil {
return nil, err
return nil, nil, err
} else if cnt == 0 {
return nil, nil
return nil, keyBuilder, nil
}
firstIter, err := prollyMap.IterOrdinalRange(ctx, 0, 1)
if err != nil {
return nil, err
return nil, nil, err
}
keyBytes, _, err := firstIter.Next(ctx)
if err != nil {
return nil, err
return nil, nil, err
}
for i := range keyBuilder.Desc.Types {
keyBuilder.PutRaw(i, keyBytes.GetField(i))
Expand All @@ -244,10 +240,10 @@ func loadLowerBound(ctx *sql.Context, qual sql.StatQualifier) (sql.Row, error) {
for i := 0; i < keyBuilder.Desc.Count(); i++ {
firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore())
if err != nil {
return nil, err
return nil, nil, err
}
}
return firstRow, nil
return firstRow, keyBuilder, nil
}

func loadFuncDeps(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier) (*sql.FuncDepSet, sql.ColSet, error) {
Expand Down
32 changes: 31 additions & 1 deletion go/libraries/doltcore/sqle/statsnoms/write.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"github.com/dolthub/dolt/go/store/prolly"
"github.com/dolthub/dolt/go/store/prolly/tree"
"github.com/dolthub/dolt/go/store/val"
)

Expand Down Expand Up @@ -120,7 +121,11 @@ func putIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *stat
valueBuilder.PutInt64(4, int64(h.NullCount()))
valueBuilder.PutString(5, strings.Join(dStats.Columns(), ","))
valueBuilder.PutString(6, typesStr)
valueBuilder.PutString(7, stats.StringifyKey(h.UpperBound(), dStats.Statistic.Typs))
boundRow, err := EncodeRow(ctx, statsMap.NodeStore(), h.UpperBound(), dStats.Tb)
if err != nil {
return err
}
valueBuilder.PutString(7, string(boundRow))
valueBuilder.PutInt64(8, int64(h.BoundCount()))
valueBuilder.PutDatetime(9, statspro.DoltBucketCreated(h))
for i, r := range h.Mcvs() {
Expand All @@ -139,3 +144,28 @@ func putIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *stat
}
return nil
}

func EncodeRow(ctx context.Context, ns tree.NodeStore, r sql.Row, tb *val.TupleBuilder) ([]byte, error) {
for i, v := range r {
if v == nil {
continue
}
if err := tree.PutField(ctx, ns, tb, i, v); err != nil {
return nil, err
}
}
return tb.Build(ns.Pool()), nil
}

func DecodeRow(ctx context.Context, ns tree.NodeStore, s string, tb *val.TupleBuilder) (sql.Row, error) {
tup := []byte(s)
r := make(sql.Row, tb.Desc.Count())
var err error
for i, _ := range r {
r[i], err = tree.GetField(ctx, tb.Desc, i, tup, ns)
if err != nil {
return nil, err
}
}
return r, nil
}
2 changes: 2 additions & 0 deletions go/libraries/doltcore/sqle/statspro/dolt_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/dolthub/go-mysql-server/sql/stats"

"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/val"
)

type DoltStats struct {
Expand All @@ -36,6 +37,7 @@ type DoltStats struct {
// field on disk
Active map[hash.Hash]int
Hist sql.Histogram
Tb *val.TupleBuilder
}

func (s *DoltStats) Clone(_ context.Context) sql.JSONWrapper {
Expand Down
3 changes: 3 additions & 0 deletions go/libraries/doltcore/sqle/statspro/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Ta

ret[meta.qual].Statistic.Fds = fds
ret[meta.qual].Statistic.Colset = colSet
ret[meta.qual].Tb = val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(len(meta.cols)))

continue
}

Expand All @@ -105,6 +107,7 @@ func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Ta
ret[meta.qual].Statistic.Cols = meta.cols
ret[meta.qual].Statistic.Typs = types
ret[meta.qual].Statistic.Qual = meta.qual
ret[meta.qual].Tb = val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(len(meta.cols)))

var start, stop uint64
// read leaf rows for each bucket
Expand Down
14 changes: 14 additions & 0 deletions integration-tests/bats/stats.bats
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,20 @@ teardown() {
[ "${lines[1]}" = "0" ]
}

@test "stats: encode/decode loop is delimiter safe" {
cd repo2

dolt sql <<EOF
create table uv (u varbinary(255) primary key);
insert into uv values ('hello, world');
analyze table uv;
EOF

run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "1" ]
}

@test "stats: correct stats directory location, issue#8324" {
cd repo2

Expand Down

0 comments on commit bd95816

Please sign in to comment.