From 24b49d0ec7068a5ecd053ace36a2b13bb46dbb81 Mon Sep 17 00:00:00 2001 From: Daylon Wilkins Date: Tue, 27 Aug 2024 03:14:04 -0700 Subject: [PATCH] Add support for Doltgres indexes --- go/go.mod | 2 +- go/go.sum | 4 +- go/go.work.sum | 4 +- .../sqle/dtables/commit_diff_table.go | 15 +- .../doltcore/sqle/index/dolt_index.go | 54 ++-- .../doltcore/sqle/index/dolt_index_test.go | 16 +- .../doltcore/sqle/index/doltgres_iter.go | 300 ++++++++++++++++++ .../doltcore/sqle/index/index_reader.go | 30 +- .../doltcore/sqle/index/prolly_index_iter.go | 16 +- go/libraries/doltcore/sqle/index/testutils.go | 6 +- .../doltcore/sqle/procedures_table.go | 6 +- 11 files changed, 394 insertions(+), 59 deletions(-) create mode 100644 go/libraries/doltcore/sqle/index/doltgres_iter.go diff --git a/go/go.mod b/go/go.mod index 5747b47ebb..fcd7bde1bf 100644 --- a/go/go.mod +++ b/go/go.mod @@ -57,7 +57,7 @@ require ( github.com/cespare/xxhash/v2 v2.2.0 github.com/creasty/defaults v1.6.0 github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2 - github.com/dolthub/go-mysql-server v0.18.2-0.20240826213655-024a764d305f + github.com/dolthub/go-mysql-server v0.18.2-0.20240827100900-3bf086dd5c18 github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 github.com/dolthub/swiss v0.1.0 github.com/goccy/go-json v0.10.2 diff --git a/go/go.sum b/go/go.sum index 8ebba0a111..46170bd3ab 100644 --- a/go/go.sum +++ b/go/go.sum @@ -183,8 +183,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e h1:kPsT4a47cw1+y/N5SSCkma7FhAPw7KeGmD6c9PBZW9Y= github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e/go.mod h1:KPUcpx070QOfJK1gNe0zx4pA5sicIK1GMikIGLKC168= -github.com/dolthub/go-mysql-server v0.18.2-0.20240826213655-024a764d305f h1:veiMzylffumQ1XX4vYuyk/iXV06o7CgDTY+YrQxtfNY= -github.com/dolthub/go-mysql-server v0.18.2-0.20240826213655-024a764d305f/go.mod h1:nbdOzd0ceWONE80vbfwoRBjut7z3CIj69ZgDF/cKuaA= +github.com/dolthub/go-mysql-server v0.18.2-0.20240827100900-3bf086dd5c18 h1:1lgwZvnecrjoc9v0iqxjdKBvaasAPiQzty40uTKOHsE= +github.com/dolthub/go-mysql-server v0.18.2-0.20240827100900-3bf086dd5c18/go.mod h1:nbdOzd0ceWONE80vbfwoRBjut7z3CIj69ZgDF/cKuaA= github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 h1:OAsXLAPL4du6tfbBgK0xXHZkOlos63RdKYS3Sgw/dfI= github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63/go.mod h1:lV7lUeuDhH5thVGDCKXbatwKy2KW80L4rMT46n+Y2/Q= github.com/dolthub/ishell v0.0.0-20240701202509-2b217167d718 h1:lT7hE5k+0nkBdj/1UOSFwjWpNxf+LCApbRHgnCA17XE= diff --git a/go/go.work.sum b/go/go.work.sum index 37a6dc28ca..743236f58e 100644 --- a/go/go.work.sum +++ b/go/go.work.sum @@ -320,8 +320,8 @@ github.com/creack/pty v1.1.9 h1:uDmaGzcdjhF4i/plgjmEsriH11Y0o7RKapEf/LDaM3w= github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM= github.com/dolthub/go-mysql-server v0.18.2-0.20240812011431-f3892cc42bbf h1:F4OT8cjaQzGlLne9vp7/q0i5QFsQE2OUWIaL5thO5qA= github.com/dolthub/go-mysql-server v0.18.2-0.20240812011431-f3892cc42bbf/go.mod h1:PwuemL+YK+YiWcUFhknixeqNLjJNfCx7KDsHNajx9fM= -github.com/dolthub/vitess v0.0.0-20240807181005-71d735078e24 h1:/zCd98CLZURqK85jQ+qRmEMx/dpXz85F1/Et7gqMGkk= -github.com/dolthub/vitess v0.0.0-20240807181005-71d735078e24/go.mod h1:uBvlRluuL+SbEWTCZ68o0xvsdYZER3CEG/35INdzfJM= +github.com/dolthub/go-mysql-server v0.18.2-0.20240827100900-3bf086dd5c18 h1:1lgwZvnecrjoc9v0iqxjdKBvaasAPiQzty40uTKOHsE= +github.com/dolthub/go-mysql-server v0.18.2-0.20240827100900-3bf086dd5c18/go.mod h1:nbdOzd0ceWONE80vbfwoRBjut7z3CIj69ZgDF/cKuaA= github.com/eapache/go-resiliency v1.1.0 h1:1NtRmCAqadE2FN4ZcN6g90TP3uk8cg9rn9eNK2197aU= github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21 h1:YEetp8/yCZMuEPMUDHG0CW/brkkEp8mzqk2+ODEitlw= github.com/eapache/queue v1.1.0 h1:YOEu7KNc61ntiQlcEeUIoDTJ2o8mQznoNvUhiigpIqc= diff --git a/go/libraries/doltcore/sqle/dtables/commit_diff_table.go b/go/libraries/doltcore/sqle/dtables/commit_diff_table.go index 4466ebecd3..7406ad64fa 100644 --- a/go/libraries/doltcore/sqle/dtables/commit_diff_table.go +++ b/go/libraries/doltcore/sqle/dtables/commit_diff_table.go @@ -151,11 +151,15 @@ func (dt *CommitDiffTable) Partitions(ctx *sql.Context) (sql.PartitionIter, erro } func (dt *CommitDiffTable) LookupPartitions(ctx *sql.Context, i sql.IndexLookup) (sql.PartitionIter, error) { - if len(i.Ranges) != 1 || len(i.Ranges[0]) != 2 { + ranges, ok := i.Ranges.(sql.MySQLRangeCollection) + if !ok { + return nil, fmt.Errorf("commit diff table requires MySQL ranges") + } + if len(ranges) != 1 || len(ranges[0]) != 2 { return nil, ErrInvalidCommitDiffTableArgs } - to := i.Ranges[0][0] - from := i.Ranges[0][1] + to := ranges[0][0] + from := ranges[0][1] switch to.UpperBound.(type) { case sql.Above, sql.Below: default: @@ -166,16 +170,15 @@ func (dt *CommitDiffTable) LookupPartitions(ctx *sql.Context, i sql.IndexLookup) default: return nil, ErrInvalidCommitDiffTableArgs } - toCommit, _, err := to.Typ.Convert(sql.GetRangeCutKey(to.UpperBound)) + toCommit, _, err := to.Typ.Convert(sql.GetMySQLRangeCutKey(to.UpperBound)) if err != nil { return nil, err } - var ok bool dt.toCommit, ok = toCommit.(string) if !ok { return nil, fmt.Errorf("to_commit must be string, found %T", toCommit) } - fromCommit, _, err := from.Typ.Convert(sql.GetRangeCutKey(from.UpperBound)) + fromCommit, _, err := from.Typ.Convert(sql.GetMySQLRangeCutKey(from.UpperBound)) if err != nil { return nil, err } diff --git a/go/libraries/doltcore/sqle/index/dolt_index.go b/go/libraries/doltcore/sqle/index/dolt_index.go index 4e82cb3a68..5b3d95fcaa 100644 --- a/go/libraries/doltcore/sqle/index/dolt_index.go +++ b/go/libraries/doltcore/sqle/index/dolt_index.go @@ -78,10 +78,14 @@ type CommitIndex struct { func (p *CommitIndex) CanSupport(ranges ...sql.Range) bool { var selects []string for _, r := range ranges { - if len(r) != 1 { + mysqlRange, ok := r.(sql.MySQLRange) + if !ok { return false } - lb, ok := r[0].LowerBound.(sql.Below) + if len(mysqlRange) != 1 { + return false + } + lb, ok := mysqlRange[0].LowerBound.(sql.Below) if !ok { return false } @@ -89,7 +93,7 @@ func (p *CommitIndex) CanSupport(ranges ...sql.Range) bool { if !ok { return false } - ub, ok := r[0].UpperBound.(sql.Above) + ub, ok := mysqlRange[0].UpperBound.(sql.Above) if !ok { return false } @@ -685,7 +689,7 @@ func (di *doltIndex) getDurableState(ctx *sql.Context, ti DoltTableable) (*durab return ret, nil } -func (di *doltIndex) prollyRanges(ctx *sql.Context, ns tree.NodeStore, ranges ...sql.Range) ([]prolly.Range, error) { +func (di *doltIndex) prollyRanges(ctx *sql.Context, ns tree.NodeStore, ranges ...sql.MySQLRange) ([]prolly.Range, error) { //todo(max): it is important that *doltIndexLookup maintains a reference // to empty sqlRanges, otherwise the analyzer will dismiss the index and // chose a less optimal lookup index. This is a GMS concern, so GMS should @@ -704,12 +708,12 @@ func (di *doltIndex) prollyRanges(ctx *sql.Context, ns tree.NodeStore, ranges .. return pranges, nil } -func (di *doltIndex) nomsRanges(ctx *sql.Context, iranges ...sql.Range) ([]*noms.ReadRange, error) { +func (di *doltIndex) nomsRanges(ctx *sql.Context, iranges ...sql.MySQLRange) ([]*noms.ReadRange, error) { // This might remain nil if the given nomsRanges each contain an EmptyRange for one of the columns. This will just // cause the lookup to return no rows, which is the desired behavior. var readRanges []*noms.ReadRange - ranges := make([]sql.Range, len(iranges)) + ranges := make([]sql.MySQLRange, len(iranges)) for i := range iranges { ranges[i] = DropTrailingAllColumnExprs(iranges[i]) @@ -729,7 +733,7 @@ RangeLoop: var lowerKeys []interface{} for _, rangeColumnExpr := range rang { if rangeColumnExpr.HasLowerBound() { - lowerKeys = append(lowerKeys, sql.GetRangeCutKey(rangeColumnExpr.LowerBound)) + lowerKeys = append(lowerKeys, sql.GetMySQLRangeCutKey(rangeColumnExpr.LowerBound)) } else { break } @@ -753,7 +757,7 @@ RangeLoop: // We promote each type as the value has already been validated against the type promotedType := di.columns[i].TypeInfo.Promote() if rangeColumnExpr.HasLowerBound() { - key := sql.GetRangeCutKey(rangeColumnExpr.LowerBound) + key := sql.GetMySQLRangeCutKey(rangeColumnExpr.LowerBound) val, err := promotedType.ConvertValueToNomsValue(ctx, di.vrw, key) if err != nil { return nil, err @@ -770,7 +774,7 @@ RangeLoop: cb.boundsCase = boundsCase_infinity_infinity } if rangeColumnExpr.HasUpperBound() { - key := sql.GetRangeCutKey(rangeColumnExpr.UpperBound) + key := sql.GetMySQLRangeCutKey(rangeColumnExpr.UpperBound) val, err := promotedType.ConvertValueToNomsValue(ctx, di.vrw, key) if err != nil { return nil, err @@ -1082,8 +1086,8 @@ func maybeGetKeyBuilder(idx durable.Index) *val.TupleBuilder { return nil } -func pruneEmptyRanges(sqlRanges []sql.Range) (pruned []sql.Range, err error) { - pruned = make([]sql.Range, 0, len(sqlRanges)) +func pruneEmptyRanges(sqlRanges []sql.MySQLRange) (pruned []sql.MySQLRange, err error) { + pruned = make([]sql.MySQLRange, 0, len(sqlRanges)) for _, sr := range sqlRanges { empty := false for _, colExpr := range sr { @@ -1137,10 +1141,10 @@ func (di *doltIndex) valueReadWriter() types.ValueReadWriter { return di.vrw } -func (di *doltIndex) prollySpatialRanges(ranges []sql.Range) ([]prolly.Range, error) { +func (di *doltIndex) prollySpatialRanges(ranges []sql.MySQLRange) ([]prolly.Range, error) { // should be exactly one range rng := ranges[0][0] - lower, upper := sql.GetRangeCutKey(rng.LowerBound), sql.GetRangeCutKey(rng.UpperBound) + lower, upper := sql.GetMySQLRangeCutKey(rng.LowerBound), sql.GetMySQLRangeCutKey(rng.UpperBound) minPoint, ok := lower.(sqltypes.Point) if !ok { @@ -1190,7 +1194,7 @@ func (di *doltIndex) prollySpatialRanges(ranges []sql.Range) ([]prolly.Range, er return pRanges, nil } -func (di *doltIndex) prollyRangesFromSqlRanges(ctx context.Context, ns tree.NodeStore, ranges []sql.Range, tb *val.TupleBuilder) ([]prolly.Range, error) { +func (di *doltIndex) prollyRangesFromSqlRanges(ctx context.Context, ns tree.NodeStore, ranges []sql.MySQLRange, tb *val.TupleBuilder) ([]prolly.Range, error) { var err error if !di.spatial { ranges, err = pruneEmptyRanges(ranges) @@ -1309,7 +1313,7 @@ func (di *doltIndex) prollyRangesFromSqlRanges(ctx context.Context, ns tree.Node return pranges, nil } -func rangeCutIsBinding(c sql.RangeCut) bool { +func rangeCutIsBinding(c sql.MySQLRangeCut) bool { switch c.(type) { case sql.Below, sql.Above, sql.AboveNull: return true @@ -1320,11 +1324,11 @@ func rangeCutIsBinding(c sql.RangeCut) bool { } } -func getRangeCutValue(cut sql.RangeCut, typ sql.Type) (interface{}, error) { +func getRangeCutValue(cut sql.MySQLRangeCut, typ sql.Type) (interface{}, error) { if _, ok := cut.(sql.AboveNull); ok { return nil, nil } - ret, oob, err := typ.Convert(sql.GetRangeCutKey(cut)) + ret, oob, err := typ.Convert(sql.GetMySQLRangeCutKey(cut)) if oob == sql.OutOfRange { return ret, nil } @@ -1335,7 +1339,7 @@ func getRangeCutValue(cut sql.RangeCut, typ sql.Type) (interface{}, error) { // // Sometimes when we construct read ranges against laid out index structures, // we want to ignore these trailing clauses. -func DropTrailingAllColumnExprs(r sql.Range) sql.Range { +func DropTrailingAllColumnExprs(r sql.MySQLRange) sql.MySQLRange { i := len(r) for i > 0 { if r[i-1].Type() != sql.RangeType_All { @@ -1352,8 +1356,8 @@ func DropTrailingAllColumnExprs(r sql.Range) sql.Range { // // This is for building physical scans against storage which does not store // NULL contiguous and ordered < non-NULL values. -func SplitNullsFromRange(r sql.Range) ([]sql.Range, error) { - res := []sql.Range{{}} +func SplitNullsFromRange(r sql.MySQLRange) ([]sql.MySQLRange, error) { + res := []sql.MySQLRange{{}} for _, rce := range r { if _, ok := rce.LowerBound.(sql.BelowNull); ok { @@ -1395,8 +1399,8 @@ func SplitNullsFromRange(r sql.Range) ([]sql.Range, error) { } // SplitNullsFromRanges splits nulls from ranges. -func SplitNullsFromRanges(rs []sql.Range) ([]sql.Range, error) { - var ret []sql.Range +func SplitNullsFromRanges(rs []sql.MySQLRange) ([]sql.MySQLRange, error) { + var ret []sql.MySQLRange for _, r := range rs { nr, err := SplitNullsFromRange(r) if err != nil { @@ -1412,7 +1416,11 @@ func SplitNullsFromRanges(rs []sql.Range) ([]sql.Range, error) { // to convert. func LookupToPointSelectStr(lookup sql.IndexLookup) ([]string, bool) { var selects []string - for _, r := range lookup.Ranges { + mysqlRanges, ok := lookup.Ranges.(sql.MySQLRangeCollection) + if !ok { + return nil, false + } + for _, r := range mysqlRanges { if len(r) != 1 { return nil, false } diff --git a/go/libraries/doltcore/sqle/index/dolt_index_test.go b/go/libraries/doltcore/sqle/index/dolt_index_test.go index 03447d9f59..044bf946fb 100644 --- a/go/libraries/doltcore/sqle/index/dolt_index_test.go +++ b/go/libraries/doltcore/sqle/index/dolt_index_test.go @@ -1058,7 +1058,7 @@ func TestDoltIndexBetween(t *testing.T) { expectedRows := convertSqlRowToInt64(test.expectedRows) exprs := idx.Expressions() - sqlIndex := sql.NewIndexBuilder(idx) + sqlIndex := sql.NewMySQLIndexBuilder(idx) for i := range test.greaterThanOrEqual { sqlIndex = sqlIndex.GreaterOrEqual(ctx, exprs[i], test.greaterThanOrEqual[i]).LessOrEqual(ctx, exprs[i], test.lessThanOrEqual[i]) } @@ -1294,7 +1294,7 @@ func requireUnorderedRowsEqual(t *testing.T, s sql.Schema, rows1, rows2 []sql.Ro func testDoltIndex(t *testing.T, ctx *sql.Context, root doltdb.RootValue, keys []interface{}, expectedRows []sql.Row, idx index.DoltIndex, cmp indexComp) { ctx = sql.NewEmptyContext() exprs := idx.Expressions() - builder := sql.NewIndexBuilder(idx) + builder := sql.NewMySQLIndexBuilder(idx) for i, key := range keys { switch cmp { case indexComp_Eq: @@ -1460,7 +1460,7 @@ func convertSqlRowToInt64(sqlRows []sql.Row) []sql.Row { func TestSplitNullsFromRange(t *testing.T) { t.Run("EmptyRange", func(t *testing.T) { - r, err := index.SplitNullsFromRange(sql.Range{}) + r, err := index.SplitNullsFromRange(sql.MySQLRange{}) assert.NoError(t, err) assert.NotNil(t, r) assert.Len(t, r, 1) @@ -1468,7 +1468,7 @@ func TestSplitNullsFromRange(t *testing.T) { }) t.Run("ThreeColumnNoNullsRange", func(t *testing.T) { - r := sql.Range{sql.LessThanRangeColumnExpr(10, types.Int8), sql.GreaterThanRangeColumnExpr(16, types.Int8), sql.NotNullRangeColumnExpr(types.Int8)} + r := sql.MySQLRange{sql.LessThanRangeColumnExpr(10, types.Int8), sql.GreaterThanRangeColumnExpr(16, types.Int8), sql.NotNullRangeColumnExpr(types.Int8)} rs, err := index.SplitNullsFromRange(r) assert.NoError(t, err) assert.NotNil(t, rs) @@ -1478,7 +1478,7 @@ func TestSplitNullsFromRange(t *testing.T) { }) t.Run("LastColumnOnlyNull", func(t *testing.T) { - r := sql.Range{sql.LessThanRangeColumnExpr(10, types.Int8), sql.GreaterThanRangeColumnExpr(16, types.Int8), sql.NullRangeColumnExpr(types.Int8)} + r := sql.MySQLRange{sql.LessThanRangeColumnExpr(10, types.Int8), sql.GreaterThanRangeColumnExpr(16, types.Int8), sql.NullRangeColumnExpr(types.Int8)} rs, err := index.SplitNullsFromRange(r) assert.NoError(t, err) assert.NotNil(t, rs) @@ -1488,7 +1488,7 @@ func TestSplitNullsFromRange(t *testing.T) { }) t.Run("LastColumnAll", func(t *testing.T) { - r := sql.Range{sql.LessThanRangeColumnExpr(10, types.Int8), sql.GreaterThanRangeColumnExpr(16, types.Int8), sql.AllRangeColumnExpr(types.Int8)} + r := sql.MySQLRange{sql.LessThanRangeColumnExpr(10, types.Int8), sql.GreaterThanRangeColumnExpr(16, types.Int8), sql.AllRangeColumnExpr(types.Int8)} rs, err := index.SplitNullsFromRange(r) assert.NoError(t, err) assert.NotNil(t, rs) @@ -1502,7 +1502,7 @@ func TestSplitNullsFromRange(t *testing.T) { }) t.Run("FirstColumnAll", func(t *testing.T) { - r := sql.Range{sql.AllRangeColumnExpr(types.Int8), sql.LessThanRangeColumnExpr(10, types.Int8), sql.GreaterThanRangeColumnExpr(16, types.Int8)} + r := sql.MySQLRange{sql.AllRangeColumnExpr(types.Int8), sql.LessThanRangeColumnExpr(10, types.Int8), sql.GreaterThanRangeColumnExpr(16, types.Int8)} rs, err := index.SplitNullsFromRange(r) assert.NoError(t, err) assert.NotNil(t, rs) @@ -1516,7 +1516,7 @@ func TestSplitNullsFromRange(t *testing.T) { }) t.Run("AllColumnAll", func(t *testing.T) { - r := sql.Range{sql.AllRangeColumnExpr(types.Int8), sql.AllRangeColumnExpr(types.Int8), sql.AllRangeColumnExpr(types.Int8)} + r := sql.MySQLRange{sql.AllRangeColumnExpr(types.Int8), sql.AllRangeColumnExpr(types.Int8), sql.AllRangeColumnExpr(types.Int8)} rs, err := index.SplitNullsFromRange(r) assert.NoError(t, err) assert.NotNil(t, rs) diff --git a/go/libraries/doltcore/sqle/index/doltgres_iter.go b/go/libraries/doltcore/sqle/index/doltgres_iter.go new file mode 100644 index 0000000000..222af449eb --- /dev/null +++ b/go/libraries/doltcore/sqle/index/doltgres_iter.go @@ -0,0 +1,300 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package index + +import ( + "context" + "encoding/binary" + "fmt" + "io" + "sort" + "strings" + + "github.com/dolthub/go-mysql-server/sql" + + "github.com/dolthub/dolt/go/store/prolly" + "github.com/dolthub/dolt/go/store/prolly/tree" + "github.com/dolthub/dolt/go/store/val" +) + +// DoltgresRangeCollection is used by Doltgres as the range collection. +type DoltgresRangeCollection []DoltgresRange + +// DoltgresRange represents a range that is used by Doltgres. +type DoltgresRange struct { + StartExpressions []sql.Expression // StartExpressions are used to find the starting point for the iterator. + StopExpressions []sql.Expression // StopExpressions are used to find the stopping point for the iterator. + FilterExpressions []sql.Expression // FilterExpressions are used to determine whether a row should be returned. + PreciseMatch bool // PreciseMatch is true when a higher-level filter is unnecessary. + reverse bool // reverse states whether the start and stop points should flip, reversing iteration. +} + +// DoltgresPartitionIter is an iterator that returns DoltgresPartition. +type DoltgresPartitionIter struct { + partitions []DoltgresPartition + curr int +} + +// DoltgresPartition is analogous to a contiguous iteration over an index. These are used to create the normal range +// iterators. +type DoltgresPartition struct { + idx *doltIndex + rang DoltgresRange + curr int +} + +// DoltgresFilterIter is a special map iterator that is able to perform filter checks without needed to delay the check +// to a higher level, which will bypass reading from the primary table. This mirrors the Postgres behavior. +type DoltgresFilterIter struct { + sqlCtx *sql.Context + inner prolly.MapIter + keyDesc val.TupleDesc + ns tree.NodeStore + row sql.Row + filters []sql.Expression +} + +var _ sql.RangeCollection = DoltgresRangeCollection{} +var _ sql.Range = DoltgresRange{} +var _ sql.PartitionIter = (*DoltgresPartitionIter)(nil) +var _ sql.Partition = DoltgresPartition{} +var _ prolly.MapIter = (*DoltgresFilterIter)(nil) + +// Equals implements the sql.RangeCollection interface. +func (ranges DoltgresRangeCollection) Equals(other sql.RangeCollection) (bool, error) { + otherCollection, ok := other.(DoltgresRangeCollection) + if !ok { + return false, nil + } + if len(ranges) != len(otherCollection) { + return false, nil + } + for i := range ranges { + if ok, err := ranges[i].Equals(otherCollection[i]); err != nil || !ok { + return ok, err + } + } + return true, nil +} + +// Len implements the sql.RangeCollection interface. +func (ranges DoltgresRangeCollection) Len() int { + return len(ranges) +} + +// DebugString implements the sql.RangeCollection interface. +func (ranges DoltgresRangeCollection) DebugString() string { + return ranges.String() +} + +// String implements the sql.RangeCollection interface. +func (ranges DoltgresRangeCollection) String() string { + sb := strings.Builder{} + sb.WriteByte('[') + for i, rang := range ranges { + if i != 0 { + sb.WriteString(", ") + } + sb.WriteString(rang.String()) + } + sb.WriteByte(']') + return sb.String() +} + +// ToRanges implements the sql.RangeCollection interface. +func (ranges DoltgresRangeCollection) ToRanges() []sql.Range { + slice := make([]sql.Range, len(ranges)) + for i := range ranges { + slice[i] = ranges[i] + } + return slice +} + +// Equals implements the sql.Range interface. +func (d DoltgresRange) Equals(other sql.Range) (bool, error) { + _, ok := other.(DoltgresRange) + if !ok { + return false, nil + } + // TODO: this isn't being called for now, so we can just return true and implement it later + return true, nil +} + +// String implements the sql.Range interface. +func (d DoltgresRange) String() string { + // TODO: implement me + return "DoltgresRange" +} + +// DebugString implements the sql.Range interface. +func (d DoltgresRange) DebugString() string { + return d.String() +} + +// Close implements the sql.PartitionIter interface. +func (iter *DoltgresPartitionIter) Close(*sql.Context) error { + return nil +} + +// Next implements the sql.PartitionIter interface. +func (iter *DoltgresPartitionIter) Next(*sql.Context) (sql.Partition, error) { + if iter.curr >= len(iter.partitions) { + return nil, io.EOF + } + iter.curr++ + return iter.partitions[iter.curr-1], nil +} + +// Key implements the sql.Partition interface. +func (partition DoltgresPartition) Key() []byte { + var bytes [4]byte + binary.BigEndian.PutUint32(bytes[:], uint32(partition.curr)) + return bytes[:] +} + +// Next implements the prolly.MapIter interface. +func (iter *DoltgresFilterIter) Next(ctx context.Context) (val.Tuple, val.Tuple, error) { +OuterLoop: + for { + k, v, err := iter.inner.Next(ctx) + if err != nil { + return k, v, err + } + if err = doltgresMapSearchKeyToRow(ctx, k, iter.keyDesc, iter.ns, iter.row); err != nil { + return k, v, err + } + for _, filterExpr := range iter.filters { + result, err := filterExpr.Eval(iter.sqlCtx, iter.row) + if err != nil { + return k, v, err + } + if !(result.(bool)) { + continue OuterLoop + } + } + return k, v, err + } +} + +// NewDoltgresPartitionIter creates a new sql.PartitionIter for Doltgres indexing. +func NewDoltgresPartitionIter(ctx *sql.Context, lookup sql.IndexLookup) (sql.PartitionIter, error) { + idx := lookup.Index.(*doltIndex) + ranges, ok := lookup.Ranges.(DoltgresRangeCollection) + if !ok { + return nil, fmt.Errorf("Doltgres partition iter expected Doltgres ranges") + } + partitions := make([]DoltgresPartition, len(ranges)) + for i, rang := range ranges { + rang.reverse = lookup.IsReverse + partitions[i] = DoltgresPartition{ + idx: idx, + rang: rang, + curr: i, + } + } + return &DoltgresPartitionIter{ + partitions: partitions, + curr: 0, + }, nil +} + +// doltgresProllyMapIterator returns a map iterator, which handles the contiguous iteration over the underlying map that +// stores an index's data. This also handles filter expressions, if any are present. +func doltgresProllyMapIterator(ctx *sql.Context, keyDesc val.TupleDesc, ns tree.NodeStore, root tree.Node, rang DoltgresRange) (prolly.MapIter, error) { + searchRow := make(sql.Row, len(keyDesc.Types)) + var findStartErr error + findStart := func(nd tree.Node) int { + return sort.Search(nd.Count(), func(i int) bool { + key := val.Tuple(nd.GetKey(i)) + if err := doltgresMapSearchKeyToRow(ctx, key, keyDesc, ns, searchRow); err != nil { + findStartErr = err + } else { + for _, expr := range rang.StartExpressions { + res, err := expr.Eval(ctx, searchRow) + if err != nil { + findStartErr = err + } else if !(res.(bool)) { + return false + } + } + return true + } + return false + }) + } + var findStopErr error + findStop := func(nd tree.Node) (idx int) { + return sort.Search(nd.Count(), func(i int) bool { + key := val.Tuple(nd.GetKey(i)) + if err := doltgresMapSearchKeyToRow(ctx, key, keyDesc, ns, searchRow); err != nil { + findStopErr = err + } else { + for _, expr := range rang.StopExpressions { + res, err := expr.Eval(ctx, searchRow) + if err != nil { + findStopErr = err + } else if res.(bool) { + return true + } + } + } + return false + }) + } + + var indexIter prolly.MapIter + var err error + if rang.reverse { + indexIter, err = tree.ReverseOrderedTreeIterFromCursors[val.Tuple, val.Tuple](ctx, root, ns, findStart, findStop) + if err != nil { + return nil, err + } + } else { + indexIter, err = tree.OrderedTreeIterFromCursors[val.Tuple, val.Tuple](ctx, root, ns, findStart, findStop) + if err != nil { + return nil, err + } + } + if findStartErr != nil { + return nil, findStartErr + } + if findStopErr != nil { + return nil, findStopErr + } + if len(rang.FilterExpressions) == 0 { + return indexIter, nil + } else { + return &DoltgresFilterIter{ + sqlCtx: ctx, + inner: indexIter, + keyDesc: keyDesc, + ns: ns, + row: searchRow, + filters: rang.FilterExpressions, + }, nil + } +} + +// doltgresMapSearchKeyToRow writes the given key into the given row. As all used functions are expressions, they expect +// a sql.Row, and we must therefore convert the key tuple into the format expected of the expression. +func doltgresMapSearchKeyToRow(ctx context.Context, key val.Tuple, keyDesc val.TupleDesc, ns tree.NodeStore, row sql.Row) (err error) { + for i := range row { + row[i], err = tree.GetField(ctx, keyDesc, i, key, ns) + if err != nil { + return err + } + } + return +} diff --git a/go/libraries/doltcore/sqle/index/index_reader.go b/go/libraries/doltcore/sqle/index/index_reader.go index e30a6ad3aa..ac8aa55acd 100644 --- a/go/libraries/doltcore/sqle/index/index_reader.go +++ b/go/libraries/doltcore/sqle/index/index_reader.go @@ -34,10 +34,14 @@ import ( func ProllyRangesForIndex(ctx *sql.Context, index sql.Index, ranges sql.RangeCollection) ([]prolly.Range, error) { idx := index.(*doltIndex) - return idx.prollyRanges(ctx, idx.ns, ranges...) + return idx.prollyRanges(ctx, idx.ns, ranges.(sql.MySQLRangeCollection)...) } func RowIterForIndexLookup(ctx *sql.Context, t DoltTableable, lookup sql.IndexLookup, pkSch sql.PrimaryKeySchema, columns []uint64) (sql.RowIter, error) { + mysqlRanges, ok := lookup.Ranges.(sql.MySQLRangeCollection) + if !ok { + return nil, fmt.Errorf("expected MySQL ranges while creating row iter") + } idx := lookup.Index.(*doltIndex) durableState, err := idx.getDurableState(ctx, t) if err != nil { @@ -45,7 +49,7 @@ func RowIterForIndexLookup(ctx *sql.Context, t DoltTableable, lookup sql.IndexLo } if types.IsFormat_DOLT(idx.Format()) { - prollyRanges, err := idx.prollyRanges(ctx, idx.ns, lookup.Ranges...) + prollyRanges, err := idx.prollyRanges(ctx, idx.ns, mysqlRanges...) if len(prollyRanges) > 1 { return nil, fmt.Errorf("expected a single index range") } @@ -54,7 +58,7 @@ func RowIterForIndexLookup(ctx *sql.Context, t DoltTableable, lookup sql.IndexLo } return RowIterForProllyRange(ctx, idx, prollyRanges[0], pkSch, columns, durableState) } else { - nomsRanges, err := idx.nomsRanges(ctx, lookup.Ranges...) + nomsRanges, err := idx.nomsRanges(ctx, mysqlRanges...) if err != nil { return nil, err } @@ -70,7 +74,7 @@ func RowIterForProllyRange(ctx *sql.Context, idx DoltIndex, r prolly.Range, pkSc if sql.IsKeyless(pkSch.Schema) { // in order to resolve row cardinality, keyless indexes must always perform // an indirect lookup through the clustered index. - return newProllyKeylessIndexIter(ctx, idx, r, pkSch, projections, durableState.Primary, durableState.Secondary) + return newProllyKeylessIndexIter(ctx, idx, r, nil, pkSch, projections, durableState.Primary, durableState.Secondary) } covers := idx.coversColumns(durableState, projections) @@ -101,6 +105,10 @@ type IndexLookupKeyIterator interface { } func NewRangePartitionIter(ctx *sql.Context, t DoltTableable, lookup sql.IndexLookup, isDoltFmt bool) (sql.PartitionIter, error) { + if _, ok := lookup.Ranges.(DoltgresRangeCollection); ok { + return NewDoltgresPartitionIter(ctx, lookup) + } + mysqlRanges := lookup.Ranges.(sql.MySQLRangeCollection) idx := lookup.Index.(*doltIndex) if lookup.IsPointLookup && isDoltFmt { return newPointPartitionIter(ctx, lookup, idx) @@ -110,9 +118,9 @@ func NewRangePartitionIter(ctx *sql.Context, t DoltTableable, lookup sql.IndexLo var nomsRanges []*noms.ReadRange var err error if isDoltFmt { - prollyRanges, err = idx.prollyRanges(ctx, idx.ns, lookup.Ranges...) + prollyRanges, err = idx.prollyRanges(ctx, idx.ns, mysqlRanges...) } else { - nomsRanges, err = idx.nomsRanges(ctx, lookup.Ranges...) + nomsRanges, err = idx.nomsRanges(ctx, mysqlRanges...) } if err != nil { return nil, err @@ -127,7 +135,7 @@ func NewRangePartitionIter(ctx *sql.Context, t DoltTableable, lookup sql.IndexLo } func newPointPartitionIter(ctx *sql.Context, lookup sql.IndexLookup, idx *doltIndex) (sql.PartitionIter, error) { - prollyRanges, err := idx.prollyRanges(ctx, idx.ns, lookup.Ranges[0]) + prollyRanges, err := idx.prollyRanges(ctx, idx.ns, lookup.Ranges.(sql.MySQLRangeCollection)[0]) if err != nil { return nil, err } @@ -405,6 +413,8 @@ func (ib *baseIndexImplBuilder) rangeIter(ctx *sql.Context, part sql.Partition) } else { return ib.sec.IterRange(ctx, p.prollyRange) } + case DoltgresPartition: + return doltgresProllyMapIterator(ctx, ib.secKd, ib.ns, ib.sec.Node(), p.rang) default: panic(fmt.Sprintf("unexpected prolly partition type: %T", part)) } @@ -425,6 +435,7 @@ func NewSequenceRangeIter(ctx context.Context, ib IndexScanBuilder, ranges []pro if len(ranges) == 0 { return &strictLookupIter{}, nil } + // TODO: probably need to do something with Doltgres ranges here? cur, err := ib.NewRangeMapIter(ctx, ranges[0], reverse) if err != nil || len(ranges) < 2 { return cur, err @@ -664,13 +675,16 @@ func (i *keylessMapIter) Next(ctx context.Context) (val.Tuple, val.Tuple, error) // NewPartitionRowIter implements IndexScanBuilder func (ib *keylessIndexImplBuilder) NewPartitionRowIter(ctx *sql.Context, part sql.Partition) (sql.RowIter, error) { var prollyRange prolly.Range + var doltgresRange *DoltgresRange switch p := part.(type) { case rangePartition: prollyRange = p.prollyRange case pointPartition: prollyRange = p.r + case DoltgresPartition: + doltgresRange = &p.rang } - return newProllyKeylessIndexIter(ctx, ib.idx, prollyRange, ib.sch, ib.projections, ib.s.Primary, ib.s.Secondary) + return newProllyKeylessIndexIter(ctx, ib.idx, prollyRange, doltgresRange, ib.sch, ib.projections, ib.s.Primary, ib.s.Secondary) } func (ib *keylessIndexImplBuilder) NewSecondaryIter(strict bool, cnt int, nullSafe []bool) SecondaryLookupIterGen { diff --git a/go/libraries/doltcore/sqle/index/prolly_index_iter.go b/go/libraries/doltcore/sqle/index/prolly_index_iter.go index 29cfba22bd..7803b52a48 100644 --- a/go/libraries/doltcore/sqle/index/prolly_index_iter.go +++ b/go/libraries/doltcore/sqle/index/prolly_index_iter.go @@ -296,14 +296,24 @@ func newProllyKeylessIndexIter( ctx *sql.Context, idx DoltIndex, rng prolly.Range, + doltgresRange *DoltgresRange, pkSch sql.PrimaryKeySchema, projections []uint64, rows, dsecondary durable.Index, ) (prollyKeylessIndexIter, error) { secondary := durable.ProllyMapFromIndex(dsecondary) - indexIter, err := secondary.IterRange(ctx, rng) - if err != nil { - return prollyKeylessIndexIter{}, err + var indexIter prolly.MapIter + var err error + if doltgresRange == nil { + indexIter, err = secondary.IterRange(ctx, rng) + if err != nil { + return prollyKeylessIndexIter{}, err + } + } else { + indexIter, err = doltgresProllyMapIterator(ctx, secondary.KeyDesc(), secondary.NodeStore(), secondary.Tuples().Root, *doltgresRange) + if err != nil { + return prollyKeylessIndexIter{}, err + } } clustered := durable.ProllyMapFromIndex(rows) diff --git a/go/libraries/doltcore/sqle/index/testutils.go b/go/libraries/doltcore/sqle/index/testutils.go index 8a4b1cf1c8..80a17ec63f 100644 --- a/go/libraries/doltcore/sqle/index/testutils.go +++ b/go/libraries/doltcore/sqle/index/testutils.go @@ -30,7 +30,7 @@ func OpenRange(tpl1, tpl2 types.Tuple) *noms.ReadRange { return CustomRange(tpl1, tpl2, sql.Open, sql.Open) } -func CustomRange(tpl1, tpl2 types.Tuple, bt1, bt2 sql.RangeBoundType) *noms.ReadRange { +func CustomRange(tpl1, tpl2 types.Tuple, bt1, bt2 sql.MySQLRangeBoundType) *noms.ReadRange { var nrc nomsRangeCheck _ = tpl1.IterFields(func(tupleIndex uint64, tupleVal types.Value) (stop bool, err error) { if tupleIndex%2 == 0 { @@ -203,12 +203,12 @@ func ReadRangesEqual(nr1, nr2 *noms.ReadRange) bool { } func NomsRangesFromIndexLookup(ctx *sql.Context, lookup sql.IndexLookup) ([]*noms.ReadRange, error) { - return lookup.Index.(*doltIndex).nomsRanges(ctx, lookup.Ranges...) + return lookup.Index.(*doltIndex).nomsRanges(ctx, lookup.Ranges.(sql.MySQLRangeCollection)...) } func ProllyRangesFromIndexLookup(ctx *sql.Context, lookup sql.IndexLookup) ([]prolly.Range, error) { idx := lookup.Index.(*doltIndex) - return idx.prollyRanges(ctx, idx.ns, lookup.Ranges...) + return idx.prollyRanges(ctx, idx.ns, lookup.Ranges.(sql.MySQLRangeCollection)...) } func DoltIndexFromSqlIndex(idx sql.Index) DoltIndex { diff --git a/go/libraries/doltcore/sqle/procedures_table.go b/go/libraries/doltcore/sqle/procedures_table.go index 89e49ff83c..d90c138b7e 100644 --- a/go/libraries/doltcore/sqle/procedures_table.go +++ b/go/libraries/doltcore/sqle/procedures_table.go @@ -320,9 +320,9 @@ func DoltProceduresGetAll(ctx *sql.Context, db Database, procedureName string) ( var lookup sql.IndexLookup if procedureName == "" { - lookup, err = sql.NewIndexBuilder(idx).IsNotNull(ctx, nameExpr).Build(ctx) + lookup, err = sql.NewMySQLIndexBuilder(idx).IsNotNull(ctx, nameExpr).Build(ctx) } else { - lookup, err = sql.NewIndexBuilder(idx).Equals(ctx, nameExpr, procedureName).Build(ctx) + lookup, err = sql.NewMySQLIndexBuilder(idx).Equals(ctx, nameExpr, procedureName).Build(ctx) } if err != nil { return nil, err @@ -456,7 +456,7 @@ func DoltProceduresGetDetails(ctx *sql.Context, tbl *WritableDoltTable, name str return sql.StoredProcedureDetails{}, false, fmt.Errorf("could not find primary key index on system table `%s`", doltdb.ProceduresTableName) } - indexLookup, err := sql.NewIndexBuilder(fragNameIndex).Equals(ctx, fragNameIndex.Expressions()[0], name).Build(ctx) + indexLookup, err := sql.NewMySQLIndexBuilder(fragNameIndex).Equals(ctx, fragNameIndex.Expressions()[0], name).Build(ctx) if err != nil { return sql.StoredProcedureDetails{}, false, err }