Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for using TEXT fields in secondary indexes and UNIQUE constraints #829

Merged
merged 18 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ require (
github.com/dolthub/dolt/go/gen/proto/dolt/services/eventsapi v0.0.0-20240827111219-e4bb9ca3442d
github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2
github.com/dolthub/go-icu-regex v0.0.0-20240916130659-0118adc6b662
github.com/dolthub/go-mysql-server v0.18.2-0.20241015190154-54bd6d6e1ce8
github.com/dolthub/go-mysql-server v0.18.2-0.20241015212643-cbdedd45d0c2
github.com/dolthub/sqllogictest/go v0.0.0-20240618184124-ca47f9354216
github.com/dolthub/vitess v0.0.0-20241010201417-9d4f54b29ccc
github.com/fatih/color v1.13.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ github.com/dolthub/go-icu-regex v0.0.0-20240916130659-0118adc6b662 h1:aC17hZD6iw
github.com/dolthub/go-icu-regex v0.0.0-20240916130659-0118adc6b662/go.mod h1:KPUcpx070QOfJK1gNe0zx4pA5sicIK1GMikIGLKC168=
github.com/dolthub/go-mysql-server v0.18.2-0.20241015190154-54bd6d6e1ce8 h1:opC/9GtHMpPf5v0eRdngp166LcJTTyQ+YZfyjAchHaY=
github.com/dolthub/go-mysql-server v0.18.2-0.20241015190154-54bd6d6e1ce8/go.mod h1:Z8tket+3sYcU3d4yW90Ggld2d+C2DUgnpB8cBP0+GvI=
github.com/dolthub/go-mysql-server v0.18.2-0.20241015212643-cbdedd45d0c2 h1:ABhQcbbGX/KzwOk6SUHzsrwrZAsktvpMkURIeiRJLd8=
github.com/dolthub/go-mysql-server v0.18.2-0.20241015212643-cbdedd45d0c2/go.mod h1:Z8tket+3sYcU3d4yW90Ggld2d+C2DUgnpB8cBP0+GvI=
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 h1:OAsXLAPL4du6tfbBgK0xXHZkOlos63RdKYS3Sgw/dfI=
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63/go.mod h1:lV7lUeuDhH5thVGDCKXbatwKy2KW80L4rMT46n+Y2/Q=
github.com/dolthub/ishell v0.0.0-20240701202509-2b217167d718 h1:lT7hE5k+0nkBdj/1UOSFwjWpNxf+LCApbRHgnCA17XE=
Expand Down
121 changes: 121 additions & 0 deletions server/analyzer/add_implicit_prefix_lengths.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Copyright 2024 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package analyzer

import (
"fmt"
"strings"

"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/analyzer"
"github.com/dolthub/go-mysql-server/sql/plan"
"github.com/dolthub/go-mysql-server/sql/transform"

"github.com/dolthub/doltgresql/server/types"
)

// defaultIndexPrefixLength is the index prefix length that this analyzer rule applies automatically to TEXT columns
// in secondary indexes.
const defaultIndexPrefixLength = 255
fulghum marked this conversation as resolved.
Show resolved Hide resolved

// AddImplicitPrefixLengths searches the |node| tree for any nodes creating an index, and plugs in a default index
// prefix length for any TEXT columns in those new indexes. This rule is intended to be used for Postgres compatibility,
// since Postgres does not require specifying prefix lengths for TEXT columns.
func AddImplicitPrefixLengths(_ *sql.Context, _ *analyzer.Analyzer, node sql.Node, _ *plan.Scope, _ analyzer.RuleSelector, _ *sql.QueryFlags) (sql.Node, transform.TreeIdentity, error) {
var targetSchema sql.Schema
transform.Inspect(node, func(node sql.Node) bool {
if st, ok := node.(sql.SchemaTarget); ok {
targetSchema = st.TargetSchema().Copy()
return false
}
return true
})

// Recurse through the node tree to fill in prefix lengths. Note that some statements come in as Block nodes
// that contain multiple nodes, so we need to recurse through and handle all of them.
return transform.Node(node, func(node sql.Node) (sql.Node, transform.TreeIdentity, error) {
switch node := node.(type) {
case *plan.AddColumn:
// For any AddColumn nodes, we need to update the target schema with the column being added, otherwise
// we won't be able to find those columns if they are also being added to a secondary index.
var err error
targetSchema, err = analyzer.ValidateAddColumn(targetSchema, node)
if err != nil {
return nil, transform.SameTree, err
}

case *plan.CreateTable:
newIndexes := make([]*sql.IndexDef, len(node.Indexes()))
for i := range node.Indexes() {
copy := *node.Indexes()[i]
newIndexes[i] = &copy
}
indexModified := false
for _, index := range newIndexes {
targetSchema := node.TargetSchema()
colMap := schToColMap(targetSchema)

for i := range index.Columns {
col, ok := colMap[strings.ToLower(index.Columns[i].Name)]
if !ok {
return nil, false, fmt.Errorf("indexed column %s not found in schema", index.Columns[i].Name)
}
if _, ok := col.Type.(types.TextType); ok && index.Columns[i].Length == 0 {
fulghum marked this conversation as resolved.
Show resolved Hide resolved
index.Columns[i].Length = defaultIndexPrefixLength
indexModified = true
}
}
}
if indexModified {
newNode, err := node.WithIndexDefs(newIndexes)
return newNode, transform.NewTree, err
}

case *plan.AlterIndex:
if node.Action == plan.IndexAction_Create {
colMap := schToColMap(targetSchema)
newColumns := make([]sql.IndexColumn, len(node.Columns))
for i := range node.Columns {
copy := node.Columns[i]
newColumns[i] = copy
}
indexModified := false
for i := range newColumns {
col, ok := colMap[strings.ToLower(newColumns[i].Name)]
if !ok {
return nil, false, fmt.Errorf("indexed column %s not found in schema", newColumns[i].Name)
}
if _, ok := col.Type.(types.TextType); ok && newColumns[i].Length == 0 {
newColumns[i].Length = defaultIndexPrefixLength
indexModified = true
}
}
if indexModified {
newNode, err := node.WithColumns(newColumns)
return newNode, transform.NewTree, err
}
}
}
return node, transform.SameTree, nil
})
}

func schToColMap(sch sql.Schema) map[string]*sql.Column {
colMap := make(map[string]*sql.Column, len(sch))
for _, col := range sch {
colMap[strings.ToLower(col.Name)] = col
}
return colMap
}
12 changes: 8 additions & 4 deletions server/analyzer/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const (
ruleId_AssignUpdateCasts
ruleId_ReplaceIndexedTables
ruleId_ReplaceSerial
ruleId_AddImplicitPrefixLengths
ruleId_InsertContextRootFinalizer
)

Expand All @@ -41,11 +42,14 @@ func Init() {
)

// Column default validation was moved to occur after type sanitization, so we'll remove it from its original place
analyzer.OnceBeforeDefault = removeAnalyzerRules(analyzer.OnceBeforeDefault,
analyzer.ValidateColumnDefaultsId)
analyzer.OnceBeforeDefault = removeAnalyzerRules(analyzer.OnceBeforeDefault, analyzer.ValidateColumnDefaultsId)

// PostgreSQL doesn't have the concept of prefix lengths, so we add a rule to implicitly add them
analyzer.OnceBeforeDefault = append([]analyzer.Rule{{Id: ruleId_AddImplicitPrefixLengths, Apply: AddImplicitPrefixLengths}},
analyzer.OnceBeforeDefault...)

// Remove all other validation rules that do not apply to Postgres
analyzer.DefaultValidationRules = removeAnalyzerRules(analyzer.DefaultValidationRules,
analyzer.ValidateOperandsId)
analyzer.DefaultValidationRules = removeAnalyzerRules(analyzer.DefaultValidationRules, analyzer.ValidateOperandsId)

analyzer.OnceAfterDefault = append(analyzer.OnceAfterDefault,
analyzer.Rule{Id: ruleId_ReplaceSerial, Apply: ReplaceSerial},
Expand Down
16 changes: 4 additions & 12 deletions testing/bats/dataloading/french-towns-communes-francaises.sql
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ CREATE TABLE Regions (
id SERIAL UNIQUE NOT NULL,
code VARCHAR(4) UNIQUE NOT NULL,
capital VARCHAR(10) NOT NULL, -- REFERENCES Towns (code),
-- TODO: TEXT columns do not work correctly in Doltgres yet
-- name TEXT UNIQUE NOT NULL
name VARCHAR(255) UNIQUE NOT NULL
name TEXT UNIQUE NOT NULL
);

-- Departments / Départements
Expand All @@ -31,21 +29,15 @@ CREATE TABLE Departments (
capital VARCHAR(10) UNIQUE NOT NULL, -- REFERENCES Towns (code),
-- Actually, it is the concatenation of D.code + T.code.
region VARCHAR(4) NOT NULL REFERENCES Regions (code),
-- TODO: TEXT columns do not work correctly in Doltgres yet
-- name TEXT UNIQUE NOT NULL
name VARCHAR(255) UNIQUE NOT NULL
name TEXT UNIQUE NOT NULL
);

-- Towns / Communes
CREATE TABLE Towns (
id SERIAL UNIQUE NOT NULL,
code VARCHAR(10) NOT NULL, -- Only unique inside a department
-- TODO: TEXT columns do not work correctly in Doltgres yet
-- article TEXT,
article VARCHAR(255),
-- TODO: TEXT columns do not work correctly in Doltgres yet
-- name TEXT NOT NULL, -- Names are not really unique, for instance 'Sainte-Croix'
name VARCHAR(255) NOT NULL, -- Names are not really unique, for instance 'Sainte-Croix'
article TEXT,
name TEXT NOT NULL, -- Names are not really unique, for instance 'Sainte-Croix'
department VARCHAR(4) NOT NULL REFERENCES Departments (code),
UNIQUE (code, department)
-- UNIQUE (name, department) -- Not perfectly unique but almost
Expand Down
92 changes: 88 additions & 4 deletions testing/go/types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2111,11 +2111,27 @@ var typesTests = []ScriptTest{
{
Name: "Text type",
SetUpScript: []string{
// Test a table with a TEXT column
"CREATE TABLE t_text (id INTEGER primary key, v1 TEXT);",
"INSERT INTO t_text VALUES (1, 'Hello'), (2, 'World'), (3, ''), (4, NULL);",

// Test a table created with a TEXT column in a unique, secondary index
"CREATE TABLE t_text_unique (id INTEGER primary key, v1 TEXT, v2 TEXT NOT NULL UNIQUE);",
"INSERT INTO t_text_unique VALUES (1, 'Hello', 'Bonjour'), (2, 'World', 'tout le monde'), (3, '', ''), (4, NULL, '!');",
},
Assertions: []ScriptTestAssertion{
{
// Use the text keyword to cast
Query: `SELECT text 'text' || ' and unknown';`,
Expected: []sql.Row{{"text and unknown"}},
},
{
// Use the text keyword to cast
Query: `SELECT text 'this is a text string' = text 'this is a text string' AS true;`,
Expected: []sql.Row{{"t"}},
},
{
// Basic select from a table with a TEXT column
Query: "SELECT * FROM t_text ORDER BY id;",
Expected: []sql.Row{
{1, "Hello"},
Expand All @@ -2125,12 +2141,80 @@ var typesTests = []ScriptTest{
},
},
{
Query: `SELECT text 'text' || ' and unknown';`,
Expected: []sql.Row{{"text and unknown"}},
// Create a unique, secondary index on a TEXT column
Query: "CREATE UNIQUE INDEX v1_unique ON t_text(v1);",
Expected: []sql.Row{},
},
{
Query: `SELECT text 'this is a text string' = text 'this is a text string' AS true;`,
Expected: []sql.Row{{"t"}},
Query: "SELECT * FROM t_text WHERE v1 = 'World';",
Expected: []sql.Row{
{2, "World"},
},
},
{
// Test the new unique constraint on the TEXT column
Query: "INSERT INTO t_text VALUES (5, 'World');",
ExpectedErr: "duplicate unique key given: [World]",
fulghum marked this conversation as resolved.
Show resolved Hide resolved
},
{
Query: "SELECT * FROM t_text_unique WHERE v2 = '!';",
Expected: []sql.Row{
{4, nil, "!"},
},
},
{
Query: "SELECT * FROM t_text_unique WHERE v2 >= '!' ORDER BY v2;",
Expected: []sql.Row{
{4, nil, "!"},
{1, "Hello", "Bonjour"},
{2, "World", "tout le monde"},
},
},
{
// Test ordering by TEXT column in a secondary index
Query: "SELECT * FROM t_text_unique ORDER BY v2;",
Expected: []sql.Row{
{3, "", ""},
{4, nil, "!"},
{1, "Hello", "Bonjour"},
{2, "World", "tout le monde"},
},
},
{
Query: "SELECT * FROM t_text_unique ORDER BY id;",
Expected: []sql.Row{
{1, "Hello", "Bonjour"},
{2, "World", "tout le monde"},
{3, "", ""},
{4, nil, "!"},
},
},
{
Query: "INSERT INTO t_text_unique VALUES (5, 'Another', 'Bonjour');",
ExpectedErr: "duplicate unique key given: [Bonjour]",
},
{
// Test that TEXT types can't be used in primary keys yet – Dolt doesn't support prefix lengths in
// primary indexes currently.
Query: "CREATE TABLE t_text_pk (id TEXT PRIMARY KEY, col1 int);",
ExpectedErr: "blob/text column 'id' used in key specification without a key length",
fulghum marked this conversation as resolved.
Show resolved Hide resolved
},
{
// Create a table with a TEXT column to test adding a non-unique, secondary index
Query: `CREATE TABLE t2 (pk int primary key, c1 TEXT);`,
Expected: []sql.Row{},
},
{
Query: `CREATE INDEX idx1 ON t2(c1);`,
Expected: []sql.Row{},
},
{
Query: `INSERT INTO t2 VALUES (1, 'one'), (2, 'two');`,
Expected: []sql.Row{},
},
{
Query: `SELECT c1 from t2 order by c1;`,
Expected: []sql.Row{{"one"}, {"two"}},
},
},
},
Expand Down
Loading