diff --git a/server/analyzer/add_implicit_prefix_lengths.go b/server/analyzer/add_implicit_prefix_lengths.go new file mode 100644 index 0000000000..eed284bf5b --- /dev/null +++ b/server/analyzer/add_implicit_prefix_lengths.go @@ -0,0 +1,121 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package analyzer + +import ( + "fmt" + "strings" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/analyzer" + "github.com/dolthub/go-mysql-server/sql/plan" + "github.com/dolthub/go-mysql-server/sql/transform" + + pgtypes "github.com/dolthub/doltgresql/server/types" +) + +// defaultIndexPrefixLength is the index prefix length that this analyzer rule applies automatically to TEXT columns +// in secondary indexes. 768 is the limit for the prefix length in MySQL and is also enforced in Dolt/GMS, so this +// is currently the largest size we can support. +const defaultIndexPrefixLength = 768 + +// AddImplicitPrefixLengths searches the |node| tree for any nodes creating an index, and plugs in a default index +// prefix length for any TEXT columns in those new indexes. This rule is intended to be used for Postgres compatibility, +// since Postgres does not require specifying prefix lengths for TEXT columns. +func AddImplicitPrefixLengths(_ *sql.Context, _ *analyzer.Analyzer, node sql.Node, _ *plan.Scope, _ analyzer.RuleSelector, _ *sql.QueryFlags) (sql.Node, transform.TreeIdentity, error) { + var targetSchema sql.Schema + transform.Inspect(node, func(node sql.Node) bool { + if st, ok := node.(sql.SchemaTarget); ok { + targetSchema = st.TargetSchema().Copy() + return false + } + return true + }) + + // Recurse through the node tree to fill in prefix lengths. Note that some statements come in as Block nodes + // that contain multiple nodes, so we need to recurse through and handle all of them. + return transform.Node(node, func(node sql.Node) (sql.Node, transform.TreeIdentity, error) { + switch node := node.(type) { + case *plan.AddColumn: + // For any AddColumn nodes, we need to update the target schema with the column being added, otherwise + // we won't be able to find those columns if they are also being added to a secondary index. + var err error + targetSchema, err = analyzer.ValidateAddColumn(targetSchema, node) + if err != nil { + return nil, transform.SameTree, err + } + + case *plan.CreateTable: + newIndexes := make([]*sql.IndexDef, len(node.Indexes())) + for i := range node.Indexes() { + copy := *node.Indexes()[i] + newIndexes[i] = © + } + indexModified := false + for _, index := range newIndexes { + targetSchema := node.TargetSchema() + colMap := schToColMap(targetSchema) + for i := range index.Columns { + col, ok := colMap[strings.ToLower(index.Columns[i].Name)] + if !ok { + return nil, false, fmt.Errorf("indexed column %s not found in schema", index.Columns[i].Name) + } + if _, ok := col.Type.(pgtypes.TextType); ok && index.Columns[i].Length == 0 { + index.Columns[i].Length = defaultIndexPrefixLength + indexModified = true + } + } + } + if indexModified { + newNode, err := node.WithIndexDefs(newIndexes) + return newNode, transform.NewTree, err + } + + case *plan.AlterIndex: + if node.Action == plan.IndexAction_Create { + colMap := schToColMap(targetSchema) + newColumns := make([]sql.IndexColumn, len(node.Columns)) + for i := range node.Columns { + copy := node.Columns[i] + newColumns[i] = copy + } + indexModified := false + for i := range newColumns { + col, ok := colMap[strings.ToLower(newColumns[i].Name)] + if !ok { + return nil, false, fmt.Errorf("indexed column %s not found in schema", newColumns[i].Name) + } + if _, ok := col.Type.(pgtypes.TextType); ok && newColumns[i].Length == 0 { + newColumns[i].Length = defaultIndexPrefixLength + indexModified = true + } + } + if indexModified { + newNode, err := node.WithColumns(newColumns) + return newNode, transform.NewTree, err + } + } + } + return node, transform.SameTree, nil + }) +} + +func schToColMap(sch sql.Schema) map[string]*sql.Column { + colMap := make(map[string]*sql.Column, len(sch)) + for _, col := range sch { + colMap[strings.ToLower(col.Name)] = col + } + return colMap +} diff --git a/server/analyzer/init.go b/server/analyzer/init.go index 06e418303c..d7691f9d65 100644 --- a/server/analyzer/init.go +++ b/server/analyzer/init.go @@ -26,6 +26,7 @@ const ( ruleId_AssignUpdateCasts ruleId_ReplaceIndexedTables ruleId_ReplaceSerial + ruleId_AddImplicitPrefixLengths ruleId_InsertContextRootFinalizer ) @@ -41,11 +42,14 @@ func Init() { ) // Column default validation was moved to occur after type sanitization, so we'll remove it from its original place - analyzer.OnceBeforeDefault = removeAnalyzerRules(analyzer.OnceBeforeDefault, - analyzer.ValidateColumnDefaultsId) + analyzer.OnceBeforeDefault = removeAnalyzerRules(analyzer.OnceBeforeDefault, analyzer.ValidateColumnDefaultsId) + + // PostgreSQL doesn't have the concept of prefix lengths, so we add a rule to implicitly add them + analyzer.OnceBeforeDefault = append([]analyzer.Rule{{Id: ruleId_AddImplicitPrefixLengths, Apply: AddImplicitPrefixLengths}}, + analyzer.OnceBeforeDefault...) + // Remove all other validation rules that do not apply to Postgres - analyzer.DefaultValidationRules = removeAnalyzerRules(analyzer.DefaultValidationRules, - analyzer.ValidateOperandsId) + analyzer.DefaultValidationRules = removeAnalyzerRules(analyzer.DefaultValidationRules, analyzer.ValidateOperandsId) analyzer.OnceAfterDefault = append(analyzer.OnceAfterDefault, analyzer.Rule{Id: ruleId_ReplaceSerial, Apply: ReplaceSerial}, diff --git a/testing/bats/dataloading/french-towns-communes-francaises.sql b/testing/bats/dataloading/french-towns-communes-francaises.sql index 097482ed97..59b5967f71 100644 --- a/testing/bats/dataloading/french-towns-communes-francaises.sql +++ b/testing/bats/dataloading/french-towns-communes-francaises.sql @@ -18,9 +18,7 @@ CREATE TABLE Regions ( id SERIAL UNIQUE NOT NULL, code VARCHAR(4) UNIQUE NOT NULL, capital VARCHAR(10) NOT NULL, -- REFERENCES Towns (code), - -- TODO: TEXT columns do not work correctly in Doltgres yet - -- name TEXT UNIQUE NOT NULL - name VARCHAR(255) UNIQUE NOT NULL + name TEXT UNIQUE NOT NULL ); -- Departments / Départements @@ -31,21 +29,15 @@ CREATE TABLE Departments ( capital VARCHAR(10) UNIQUE NOT NULL, -- REFERENCES Towns (code), -- Actually, it is the concatenation of D.code + T.code. region VARCHAR(4) NOT NULL REFERENCES Regions (code), - -- TODO: TEXT columns do not work correctly in Doltgres yet - -- name TEXT UNIQUE NOT NULL - name VARCHAR(255) UNIQUE NOT NULL + name TEXT UNIQUE NOT NULL ); -- Towns / Communes CREATE TABLE Towns ( id SERIAL UNIQUE NOT NULL, code VARCHAR(10) NOT NULL, -- Only unique inside a department - -- TODO: TEXT columns do not work correctly in Doltgres yet - -- article TEXT, - article VARCHAR(255), - -- TODO: TEXT columns do not work correctly in Doltgres yet - -- name TEXT NOT NULL, -- Names are not really unique, for instance 'Sainte-Croix' - name VARCHAR(255) NOT NULL, -- Names are not really unique, for instance 'Sainte-Croix' + article TEXT, + name TEXT NOT NULL, -- Names are not really unique, for instance 'Sainte-Croix' department VARCHAR(4) NOT NULL REFERENCES Departments (code), UNIQUE (code, department) -- UNIQUE (name, department) -- Not perfectly unique but almost diff --git a/testing/go/types_test.go b/testing/go/types_test.go index 5a7115f83d..27a84bc680 100644 --- a/testing/go/types_test.go +++ b/testing/go/types_test.go @@ -2111,11 +2111,27 @@ var typesTests = []ScriptTest{ { Name: "Text type", SetUpScript: []string{ + // Test a table with a TEXT column "CREATE TABLE t_text (id INTEGER primary key, v1 TEXT);", "INSERT INTO t_text VALUES (1, 'Hello'), (2, 'World'), (3, ''), (4, NULL);", + + // Test a table created with a TEXT column in a unique, secondary index + "CREATE TABLE t_text_unique (id INTEGER primary key, v1 TEXT, v2 TEXT NOT NULL UNIQUE);", + "INSERT INTO t_text_unique VALUES (1, 'Hello', 'Bonjour'), (2, 'World', 'tout le monde'), (3, '', ''), (4, NULL, '!');", }, Assertions: []ScriptTestAssertion{ { + // Use the text keyword to cast + Query: `SELECT text 'text' || ' and unknown';`, + Expected: []sql.Row{{"text and unknown"}}, + }, + { + // Use the text keyword to cast + Query: `SELECT text 'this is a text string' = text 'this is a text string' AS true;`, + Expected: []sql.Row{{"t"}}, + }, + { + // Basic select from a table with a TEXT column Query: "SELECT * FROM t_text ORDER BY id;", Expected: []sql.Row{ {1, "Hello"}, @@ -2125,12 +2141,83 @@ var typesTests = []ScriptTest{ }, }, { - Query: `SELECT text 'text' || ' and unknown';`, - Expected: []sql.Row{{"text and unknown"}}, + // Create a unique, secondary index on a TEXT column + Query: "CREATE UNIQUE INDEX v1_unique ON t_text(v1);", + Expected: []sql.Row{}, }, { - Query: `SELECT text 'this is a text string' = text 'this is a text string' AS true;`, - Expected: []sql.Row{{"t"}}, + Query: "SELECT * FROM t_text WHERE v1 = 'World';", + Expected: []sql.Row{ + {2, "World"}, + }, + }, + { + // Test the new unique constraint on the TEXT column + Query: "INSERT INTO t_text VALUES (5, 'World');", + ExpectedErr: "unique", + }, + { + Query: "SELECT * FROM t_text_unique WHERE v2 = '!';", + Expected: []sql.Row{ + {4, nil, "!"}, + }, + }, + { + Query: "SELECT * FROM t_text_unique WHERE v2 >= '!' ORDER BY v2;", + Expected: []sql.Row{ + {4, nil, "!"}, + {1, "Hello", "Bonjour"}, + {2, "World", "tout le monde"}, + }, + }, + { + // Test ordering by TEXT column in a secondary index + Query: "SELECT * FROM t_text_unique ORDER BY v2;", + Expected: []sql.Row{ + {3, "", ""}, + {4, nil, "!"}, + {1, "Hello", "Bonjour"}, + {2, "World", "tout le monde"}, + }, + }, + { + Query: "SELECT * FROM t_text_unique ORDER BY id;", + Expected: []sql.Row{ + {1, "Hello", "Bonjour"}, + {2, "World", "tout le monde"}, + {3, "", ""}, + {4, nil, "!"}, + }, + }, + { + Query: "INSERT INTO t_text_unique VALUES (5, 'Another', 'Bonjour');", + ExpectedErr: "unique", + }, + { + // Create a secondary index over multiple text fields + Query: "CREATE INDEX on t_text_unique(v1, v2);", + Expected: []sql.Row{}, + }, + { + Query: "SELECT id FROM t_text_unique WHERE v1='Hello' and v2='Bonjour';", + Expected: []sql.Row{{1}}, + }, + { + // Create a table with a TEXT column to test adding a non-unique, secondary index + Query: `CREATE TABLE t2 (pk int primary key, c1 TEXT);`, + Expected: []sql.Row{}, + }, + { + Query: `CREATE INDEX idx1 ON t2(c1);`, + Expected: []sql.Row{}, + }, + { + Query: `INSERT INTO t2 VALUES (1, 'one'), (2, 'two');`, + Expected: []sql.Row{}, + }, + { + Query: `SELECT c1 from t2 order by c1;`, + Expected: []sql.Row{{"one"}, {"two"}}, }, }, },