From d303601040ebb8e01197410b774d525e11e46e13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20Taylor?= <andres@planetscale.com>
Date: Thu, 4 Jul 2024 23:43:16 +0200
Subject: [PATCH] Improve typing during query planning (#16310)

Signed-off-by: Andres Taylor <andres@planetscale.com>
---
 .../planbuilder/operators/aggregator.go       |   4 +-
 .../vtgate/planbuilder/operators/distinct.go  |   2 +-
 .../vtgate/planbuilder/operators/ordering.go  |   2 +-
 .../planbuilder/operators/queryprojection.go  |   2 +-
 go/vt/vtgate/planbuilder/operators/route.go   |   2 +-
 .../plancontext/planning_context.go           | 109 ++++++-
 .../plancontext/planning_context_test.go      | 270 +++++++++++++++++-
 .../planbuilder/testdata/aggr_cases.json      |  26 +-
 .../planbuilder/testdata/from_cases.json      |  56 ++++
 .../testdata/postprocess_cases.json           |   8 +-
 .../planbuilder/testdata/union_cases.json     |  58 ++--
 .../testdata/unsupported_cases.json           |   5 -
 go/vt/vtgate/semantics/semantic_state.go      |  19 --
 13 files changed, 476 insertions(+), 87 deletions(-)

diff --git a/go/vt/vtgate/planbuilder/operators/aggregator.go b/go/vt/vtgate/planbuilder/operators/aggregator.go
index 9db119bcaad..fd9fca30110 100644
--- a/go/vt/vtgate/planbuilder/operators/aggregator.go
+++ b/go/vt/vtgate/planbuilder/operators/aggregator.go
@@ -379,7 +379,7 @@ func (a *Aggregator) planOffsets(ctx *plancontext.PlanningContext) Operator {
 			a.Grouping[idx].ColOffset = offset
 			gb.ColOffset = offset
 		}
-		if gb.WSOffset != -1 || !ctx.SemTable.NeedsWeightString(gb.Inner) {
+		if gb.WSOffset != -1 || !ctx.NeedsWeightString(gb.Inner) {
 			continue
 		}
 
@@ -516,7 +516,7 @@ func (a *Aggregator) pushRemainingGroupingColumnsAndWeightStrings(ctx *planconte
 			a.Grouping[idx].ColOffset = offset
 		}
 
-		if gb.WSOffset != -1 || !ctx.SemTable.NeedsWeightString(gb.Inner) {
+		if gb.WSOffset != -1 || !ctx.NeedsWeightString(gb.Inner) {
 			continue
 		}
 
diff --git a/go/vt/vtgate/planbuilder/operators/distinct.go b/go/vt/vtgate/planbuilder/operators/distinct.go
index 7807b94d491..4fd53725e10 100644
--- a/go/vt/vtgate/planbuilder/operators/distinct.go
+++ b/go/vt/vtgate/planbuilder/operators/distinct.go
@@ -50,7 +50,7 @@ func (d *Distinct) planOffsets(ctx *plancontext.PlanningContext) Operator {
 	for idx, col := range columns {
 		e := col.Expr
 		var wsCol *int
-		if ctx.SemTable.NeedsWeightString(e) {
+		if ctx.NeedsWeightString(e) {
 			offset := d.Source.AddWSColumn(ctx, idx, false)
 			wsCol = &offset
 		}
diff --git a/go/vt/vtgate/planbuilder/operators/ordering.go b/go/vt/vtgate/planbuilder/operators/ordering.go
index 5414b34fc40..94c4f3dd846 100644
--- a/go/vt/vtgate/planbuilder/operators/ordering.go
+++ b/go/vt/vtgate/planbuilder/operators/ordering.go
@@ -86,7 +86,7 @@ func (o *Ordering) planOffsets(ctx *plancontext.PlanningContext) Operator {
 		offset := o.Source.AddColumn(ctx, true, false, aeWrap(order.SimplifiedExpr))
 		o.Offset = append(o.Offset, offset)
 
-		if !ctx.SemTable.NeedsWeightString(order.SimplifiedExpr) {
+		if !ctx.NeedsWeightString(order.SimplifiedExpr) {
 			o.WOffset = append(o.WOffset, -1)
 			continue
 		}
diff --git a/go/vt/vtgate/planbuilder/operators/queryprojection.go b/go/vt/vtgate/planbuilder/operators/queryprojection.go
index 548fc5aaa0b..8ad8a6efe1e 100644
--- a/go/vt/vtgate/planbuilder/operators/queryprojection.go
+++ b/go/vt/vtgate/planbuilder/operators/queryprojection.go
@@ -89,7 +89,7 @@ type (
 )
 
 func (aggr Aggr) NeedsWeightString(ctx *plancontext.PlanningContext) bool {
-	return aggr.OpCode.NeedsComparableValues() && ctx.SemTable.NeedsWeightString(aggr.Func.GetArg())
+	return aggr.OpCode.NeedsComparableValues() && ctx.NeedsWeightString(aggr.Func.GetArg())
 }
 
 func (aggr Aggr) GetTypeCollation(ctx *plancontext.PlanningContext) evalengine.Type {
diff --git a/go/vt/vtgate/planbuilder/operators/route.go b/go/vt/vtgate/planbuilder/operators/route.go
index cc049d22753..62d6aad6a97 100644
--- a/go/vt/vtgate/planbuilder/operators/route.go
+++ b/go/vt/vtgate/planbuilder/operators/route.go
@@ -805,7 +805,7 @@ func (r *Route) planOffsets(ctx *plancontext.PlanningContext) Operator {
 			WOffset:   -1,
 			Direction: order.Inner.Direction,
 		}
-		if ctx.SemTable.NeedsWeightString(order.SimplifiedExpr) {
+		if ctx.NeedsWeightString(order.SimplifiedExpr) {
 			ws := weightStringFor(order.SimplifiedExpr)
 			offset := r.AddColumn(ctx, true, false, aeWrap(ws))
 			o.WOffset = offset
diff --git a/go/vt/vtgate/planbuilder/plancontext/planning_context.go b/go/vt/vtgate/planbuilder/plancontext/planning_context.go
index 7db192ab7f8..58be17febab 100644
--- a/go/vt/vtgate/planbuilder/plancontext/planning_context.go
+++ b/go/vt/vtgate/planbuilder/plancontext/planning_context.go
@@ -23,6 +23,7 @@ import (
 	querypb "vitess.io/vitess/go/vt/proto/query"
 	"vitess.io/vitess/go/vt/sqlparser"
 	"vitess.io/vitess/go/vt/vterrors"
+	"vitess.io/vitess/go/vt/vtgate/engine/opcode"
 	"vitess.io/vitess/go/vt/vtgate/evalengine"
 	"vitess.io/vitess/go/vt/vtgate/semantics"
 )
@@ -214,7 +215,12 @@ func (ctx *PlanningContext) RewriteDerivedTableExpression(expr sqlparser.Expr, t
 func (ctx *PlanningContext) TypeForExpr(e sqlparser.Expr) (evalengine.Type, bool) {
 	t, found := ctx.SemTable.TypeForExpr(e)
 	if !found {
-		return t, found
+		typ := ctx.calculateTypeFor(e)
+		if typ.Valid() {
+			ctx.SemTable.ExprTypes[e] = typ
+			return typ, true
+		}
+		return evalengine.NewUnknownType(), false
 	}
 	deps := ctx.SemTable.RecursiveDeps(e)
 	// If the expression is from an outer table, it should be nullable
@@ -226,6 +232,89 @@ func (ctx *PlanningContext) TypeForExpr(e sqlparser.Expr) (evalengine.Type, bool
 	return t, true
 }
 
+func (ctx *PlanningContext) calculateTypeFor(e sqlparser.Expr) evalengine.Type {
+	cfg := &evalengine.Config{
+		ResolveType: func(expr sqlparser.Expr) (evalengine.Type, bool) {
+			col, isCol := expr.(*sqlparser.ColName)
+			if !isCol {
+				return evalengine.NewUnknownType(), false
+			}
+			return ctx.SemTable.TypeForExpr(col)
+		},
+		Collation:   ctx.SemTable.Collation,
+		Environment: ctx.VSchema.Environment(),
+		ResolveColumn: func(name *sqlparser.ColName) (int, error) {
+			// We don't need to resolve the column for type calculation
+			return 0, nil
+		},
+	}
+	env := evalengine.EmptyExpressionEnv(ctx.VSchema.Environment())
+
+	// We need to rewrite the aggregate functions to their corresponding types
+	// The evaluation engine compiler doesn't handle them, so we replace them with Arguments before
+	// asking the compiler for the type
+
+	// TODO: put this back in when we can calculate the aggregation types correctly
+	// expr, unknown := ctx.replaceAggrWithArg(e, cfg, env)
+	// if unknown {
+	// 	return evalengine.NewUnknownType()
+	// }
+
+	translatedExpr, err := evalengine.Translate(e, cfg)
+	if err != nil {
+		return evalengine.NewUnknownType()
+	}
+
+	typ, err := env.TypeOf(translatedExpr)
+	if err != nil {
+		return evalengine.NewUnknownType()
+	}
+	return typ
+}
+
+// replaceAggrWithArg replaces aggregate functions with Arguments in the given expression.
+// this is to prepare for sending the expression to the evalengine compiler to figure out the type
+func (ctx *PlanningContext) replaceAggrWithArg(e sqlparser.Expr, cfg *evalengine.Config, env *evalengine.ExpressionEnv) (expr sqlparser.Expr, unknown bool) {
+	expr = sqlparser.CopyOnRewrite(e, nil, func(cursor *sqlparser.CopyOnWriteCursor) {
+		agg, ok := cursor.Node().(sqlparser.AggrFunc)
+		if !ok {
+			return
+		}
+		code, ok := opcode.SupportedAggregates[agg.AggrName()]
+		if !ok {
+			// We don't know the type of this aggregate function
+			// The type calculation will be set to unknown
+			unknown = true
+			cursor.StopTreeWalk()
+			return
+		}
+		var inputType evalengine.Type
+		if arg := agg.GetArg(); arg != nil {
+			translatedExpr, err := evalengine.Translate(arg, cfg)
+			if err != nil {
+				unknown = true
+				cursor.StopTreeWalk()
+				return
+			}
+
+			inputType, err = env.TypeOf(translatedExpr)
+			if err != nil {
+				unknown = true
+				cursor.StopTreeWalk()
+				return
+			}
+		}
+		typ := code.ResolveType(inputType, ctx.VSchema.Environment().CollationEnv())
+		cursor.Replace(&sqlparser.Argument{
+			Name:  "arg",
+			Type:  typ.Type(),
+			Size:  typ.Size(),
+			Scale: typ.Scale(),
+		})
+	}, nil).(sqlparser.Expr)
+	return expr, unknown
+}
+
 // SQLTypeForExpr returns the sql type of the given expression, with nullable set if the expression is from an outer table.
 func (ctx *PlanningContext) SQLTypeForExpr(e sqlparser.Expr) sqltypes.Type {
 	t, found := ctx.TypeForExpr(e)
@@ -235,6 +324,24 @@ func (ctx *PlanningContext) SQLTypeForExpr(e sqlparser.Expr) sqltypes.Type {
 	return t.Type()
 }
 
+func (ctx *PlanningContext) NeedsWeightString(e sqlparser.Expr) bool {
+	switch e := e.(type) {
+	case *sqlparser.WeightStringFuncExpr, *sqlparser.Literal:
+		return false
+	default:
+		typ, found := ctx.TypeForExpr(e)
+		if !found {
+			return true
+		}
+
+		if !sqltypes.IsText(typ.Type()) {
+			return false
+		}
+
+		return !ctx.VSchema.Environment().CollationEnv().IsSupported(typ.Collation())
+	}
+}
+
 func (ctx *PlanningContext) IsAggr(e sqlparser.SQLNode) bool {
 	switch node := e.(type) {
 	case sqlparser.AggrFunc:
diff --git a/go/vt/vtgate/planbuilder/plancontext/planning_context_test.go b/go/vt/vtgate/planbuilder/plancontext/planning_context_test.go
index b47286abdb2..3ab58cba724 100644
--- a/go/vt/vtgate/planbuilder/plancontext/planning_context_test.go
+++ b/go/vt/vtgate/planbuilder/plancontext/planning_context_test.go
@@ -17,6 +17,8 @@ limitations under the License.
 package plancontext
 
 import (
+	"context"
+	"fmt"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -24,12 +26,81 @@ import (
 
 	"vitess.io/vitess/go/mysql/collations"
 	"vitess.io/vitess/go/sqltypes"
+	"vitess.io/vitess/go/vt/key"
+	querypb "vitess.io/vitess/go/vt/proto/query"
+	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
+	vschemapb "vitess.io/vitess/go/vt/proto/vschema"
+	vtgatepb "vitess.io/vitess/go/vt/proto/vtgate"
+	"vitess.io/vitess/go/vt/vtenv"
+	"vitess.io/vitess/go/vt/vtgate/engine"
 	"vitess.io/vitess/go/vt/vtgate/evalengine"
+	"vitess.io/vitess/go/vt/vtgate/vindexes"
 
 	"vitess.io/vitess/go/vt/sqlparser"
 	"vitess.io/vitess/go/vt/vtgate/semantics"
 )
 
+func TestTyping(t *testing.T) {
+	// this test checks that PlanningContext can take an expression with only columns typed and
+	// return the type of the full expression
+	// col1 is a bigint, and col2 is a varchar
+	expr, err := sqlparser.NewTestParser().ParseExpr("sum(length(col1)) + avg(acos(col2))")
+	require.NoError(t, err)
+	semTable := semantics.EmptySemTable()
+	var sum, avg, col1, col2, length, acos sqlparser.Expr
+
+	// here we walk the expression tree and fetch the two aggregate functions, and set the types for the columns
+	_ = sqlparser.Walk(func(node sqlparser.SQLNode) (kontinue bool, err error) {
+		switch node := node.(type) {
+		case *sqlparser.ColName:
+			switch node.Name.String() {
+			case "col1":
+				semTable.ExprTypes[node] = evalengine.NewType(sqltypes.Int64, collations.Unknown)
+				col1 = node
+			case "col2":
+				semTable.ExprTypes[node] = evalengine.NewType(sqltypes.VarChar, collations.Unknown)
+				col2 = node
+			}
+		case *sqlparser.FuncExpr:
+			switch node.Name.Lowered() {
+			case "length":
+				length = node
+			case "acos":
+				acos = node
+			}
+
+		case *sqlparser.Sum:
+			sum = node
+		case *sqlparser.Avg:
+			avg = node
+		}
+
+		return true, nil
+	}, expr)
+
+	ctx := createPlanContext(semTable)
+
+	expectations := map[sqlparser.Expr]sqltypes.Type{
+		// TODO: re-enable these tests once we can calculate aggregation types
+		// sum:    sqltypes.Decimal,
+		// avg:    sqltypes.Float64,
+		// expr:   sqltypes.Float64,
+		col1:   sqltypes.Int64,
+		col2:   sqltypes.VarChar,
+		length: sqltypes.Int64,
+		acos:   sqltypes.Float64,
+	}
+	fmt.Println(sum, avg, expr, acos, col1, col2)
+
+	for expr, expected := range expectations {
+		t.Run(sqlparser.String(expr), func(t *testing.T) {
+			typ, found := ctx.TypeForExpr(expr)
+			require.True(t, found)
+			require.Equal(t, expected, typ.Type())
+		})
+	}
+}
+
 func TestOuterTableNullability(t *testing.T) {
 	// Tests that columns from outer tables are nullable,
 	// even though the semantic state says that they are not nullable.
@@ -96,13 +167,202 @@ func prepareContextAndFindColumns(t *testing.T, query string) (ctx *PlanningCont
 		return false, nil
 	}, nil, expr)
 
-	ctx = &PlanningContext{
-		SemTable:          semTable,
+	ctx = createPlanContext(semTable)
+	ctx.Statement = stmt
+	ctx.OuterTables = t2
+
+	return
+}
+
+func createPlanContext(st *semantics.SemTable) *PlanningContext {
+	return &PlanningContext{
+		SemTable:          st,
 		joinPredicates:    map[sqlparser.Expr][]sqlparser.Expr{},
 		skipPredicates:    map[sqlparser.Expr]any{},
 		ReservedArguments: map[sqlparser.Expr]string{},
-		Statement:         stmt,
-		OuterTables:       t2, // t2 is the outer table.
+		VSchema:           &vschema{},
 	}
-	return
 }
+
+type vschema struct{}
+
+func (v *vschema) FindTable(tablename sqlparser.TableName) (*vindexes.Table, string, topodatapb.TabletType, key.Destination, error) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) FindView(name sqlparser.TableName) sqlparser.SelectStatement {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) FindTableOrVindex(tablename sqlparser.TableName) (*vindexes.Table, vindexes.Vindex, string, topodatapb.TabletType, key.Destination, error) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) DefaultKeyspace() (*vindexes.Keyspace, error) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) TargetString() string {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) Destination() key.Destination {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) TabletType() topodatapb.TabletType {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) TargetDestination(qualifier string) (key.Destination, *vindexes.Keyspace, topodatapb.TabletType, error) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) AnyKeyspace() (*vindexes.Keyspace, error) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) FirstSortedKeyspace() (*vindexes.Keyspace, error) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) SysVarSetEnabled() bool {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) KeyspaceExists(keyspace string) bool {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) AllKeyspace() ([]*vindexes.Keyspace, error) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) FindKeyspace(keyspace string) (*vindexes.Keyspace, error) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) GetSemTable() *semantics.SemTable {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) Planner() PlannerVersion {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) SetPlannerVersion(pv PlannerVersion) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) ConnCollation() collations.ID {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) Environment() *vtenv.Environment {
+	return vtenv.NewTestEnv()
+}
+
+func (v *vschema) ErrorIfShardedF(keyspace *vindexes.Keyspace, warn, errFmt string, params ...any) error {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) WarnUnshardedOnly(format string, params ...any) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) PlannerWarning(message string) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) ForeignKeyMode(keyspace string) (vschemapb.Keyspace_ForeignKeyMode, error) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) KeyspaceError(keyspace string) error {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) GetForeignKeyChecksState() *bool {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) GetVSchema() *vindexes.VSchema {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) GetSrvVschema() *vschemapb.SrvVSchema {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) FindRoutedShard(keyspace, shard string) (string, error) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) IsShardRoutingEnabled() bool {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) IsViewsEnabled() bool {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) GetUDV(name string) *querypb.BindVariable {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) PlanPrepareStatement(ctx context.Context, query string) (*engine.Plan, sqlparser.Statement, error) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) ClearPrepareData(stmtName string) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) GetPrepareData(stmtName string) *vtgatepb.PrepareData {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) StorePrepareData(name string, pd *vtgatepb.PrepareData) {
+	// TODO implement me
+	panic("implement me")
+}
+
+func (v *vschema) GetAggregateUDFs() []string {
+	// TODO implement me
+	panic("implement me")
+}
+
+var _ VSchema = (*vschema)(nil)
diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json
index d51e2868f6c..b124e8f2b50 100644
--- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json
+++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json
@@ -1675,8 +1675,7 @@
         "OperatorType": "Aggregate",
         "Variant": "Ordered",
         "Aggregates": "sum_count_star(1) AS count(*)",
-        "GroupBy": "(0|2)",
-        "ResultColumns": 2,
+        "GroupBy": "0 COLLATE latin1_swedish_ci",
         "Inputs": [
           {
             "OperatorType": "Route",
@@ -1685,9 +1684,9 @@
               "Name": "user",
               "Sharded": true
             },
-            "FieldQuery": "select lower(col1) as v, count(*), weight_string(lower(col1)) from authoritative where 1 != 1 group by lower(col1), weight_string(lower(col1))",
-            "OrderBy": "(0|2) ASC",
-            "Query": "select lower(col1) as v, count(*), weight_string(lower(col1)) from authoritative group by lower(col1), weight_string(lower(col1)) order by lower(col1) asc",
+            "FieldQuery": "select lower(col1) as v, count(*) from authoritative where 1 != 1 group by lower(col1)",
+            "OrderBy": "0 ASC COLLATE latin1_swedish_ci",
+            "Query": "select lower(col1) as v, count(*) from authoritative group by lower(col1) order by lower(col1) asc",
             "Table": "authoritative"
           }
         ]
@@ -1707,8 +1706,7 @@
         "OperatorType": "Aggregate",
         "Variant": "Ordered",
         "Aggregates": "sum_count_star(1) AS count(*)",
-        "GroupBy": "(0|2)",
-        "ResultColumns": 2,
+        "GroupBy": "0",
         "Inputs": [
           {
             "OperatorType": "Route",
@@ -1717,9 +1715,9 @@
               "Name": "user",
               "Sharded": true
             },
-            "FieldQuery": "select char_length(col1) as a, count(*), weight_string(char_length(col1)) from authoritative where 1 != 1 group by char_length(col1), weight_string(char_length(col1))",
-            "OrderBy": "(0|2) ASC",
-            "Query": "select char_length(col1) as a, count(*), weight_string(char_length(col1)) from authoritative group by char_length(col1), weight_string(char_length(col1)) order by char_length(authoritative.col1) asc",
+            "FieldQuery": "select char_length(col1) as a, count(*) from authoritative where 1 != 1 group by char_length(col1)",
+            "OrderBy": "0 ASC",
+            "Query": "select char_length(col1) as a, count(*) from authoritative group by char_length(col1) order by char_length(authoritative.col1) asc",
             "Table": "authoritative"
           }
         ]
@@ -4085,13 +4083,13 @@
       "Instructions": {
         "OperatorType": "Sort",
         "Variant": "Memory",
-        "OrderBy": "(2|3) ASC",
+        "OrderBy": "2 ASC",
         "ResultColumns": 2,
         "Inputs": [
           {
             "OperatorType": "Aggregate",
             "Variant": "Ordered",
-            "Aggregates": "sum_count_star(1) AS count(*), any_value(2) AS col + 1, any_value(3)",
+            "Aggregates": "sum_count_star(1) AS count(*), any_value(2) AS col + 1",
             "GroupBy": "0",
             "Inputs": [
               {
@@ -4101,9 +4099,9 @@
                   "Name": "user",
                   "Sharded": true
                 },
-                "FieldQuery": "select col, count(*), col + 1, weight_string(col + 1) from `user` where 1 != 1 group by col",
+                "FieldQuery": "select col, count(*), col + 1 from `user` where 1 != 1 group by col",
                 "OrderBy": "0 ASC",
-                "Query": "select col, count(*), col + 1, weight_string(col + 1) from `user` group by col order by col asc",
+                "Query": "select col, count(*), col + 1 from `user` group by col order by col asc",
                 "Table": "`user`"
               }
             ]
diff --git a/go/vt/vtgate/planbuilder/testdata/from_cases.json b/go/vt/vtgate/planbuilder/testdata/from_cases.json
index 6db17511a2a..0e540e88b27 100644
--- a/go/vt/vtgate/planbuilder/testdata/from_cases.json
+++ b/go/vt/vtgate/planbuilder/testdata/from_cases.json
@@ -2841,6 +2841,62 @@
       ]
     }
   },
+  {
+    "comment": "Hash join has to be used since we have LIMIT on both sides",
+    "query": "select id from (select id from user limit 10) u join (select user_id from user_extra limit 10) ue on u.id = ue.user_id",
+    "plan": {
+      "QueryType": "SELECT",
+      "Original": "select id from (select id from user limit 10) u join (select user_id from user_extra limit 10) ue on u.id = ue.user_id",
+      "Instructions": {
+        "OperatorType": "Join",
+        "Variant": "HashJoin",
+        "ComparisonType": "-1",
+        "JoinColumnIndexes": "-1",
+        "Predicate": "u.id = ue.user_id",
+        "TableName": "`user`_user_extra",
+        "Inputs": [
+          {
+            "OperatorType": "Limit",
+            "Count": "10",
+            "Inputs": [
+              {
+                "OperatorType": "Route",
+                "Variant": "Scatter",
+                "Keyspace": {
+                  "Name": "user",
+                  "Sharded": true
+                },
+                "FieldQuery": "select u.id from (select id from `user` where 1 != 1) as u where 1 != 1",
+                "Query": "select u.id from (select id from `user`) as u limit 10",
+                "Table": "`user`"
+              }
+            ]
+          },
+          {
+            "OperatorType": "Limit",
+            "Count": "10",
+            "Inputs": [
+              {
+                "OperatorType": "Route",
+                "Variant": "Scatter",
+                "Keyspace": {
+                  "Name": "user",
+                  "Sharded": true
+                },
+                "FieldQuery": "select ue.user_id from (select user_id from user_extra where 1 != 1) as ue where 1 != 1",
+                "Query": "select ue.user_id from (select user_id from user_extra) as ue limit 10",
+                "Table": "user_extra"
+              }
+            ]
+          }
+        ]
+      },
+      "TablesUsed": [
+        "user.user",
+        "user.user_extra"
+      ]
+    }
+  },
   {
     "comment": "alias on column from derived table. TODO: to support alias in SimpleProjection engine primitive.",
     "query": "select a as k from (select count(*) as a from user) t",
diff --git a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json
index 8e2fd1e31cf..74e5229016a 100644
--- a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json
+++ b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json
@@ -2030,13 +2030,13 @@
       "Instructions": {
         "OperatorType": "Sort",
         "Variant": "Memory",
-        "OrderBy": "(1|2) ASC",
+        "OrderBy": "1 ASC",
         "ResultColumns": 1,
         "Inputs": [
           {
             "OperatorType": "Join",
             "Variant": "Join",
-            "JoinColumnIndexes": "L:0,R:0,R:1",
+            "JoinColumnIndexes": "L:0,R:0",
             "JoinVars": {
               "user_col": 1
             },
@@ -2060,8 +2060,8 @@
                   "Name": "user",
                   "Sharded": true
                 },
-                "FieldQuery": "select coalesce(:user_col /* INT16 */, user_extra.col), weight_string(coalesce(:user_col /* INT16 */, user_extra.col)) from user_extra where 1 != 1",
-                "Query": "select coalesce(:user_col /* INT16 */, user_extra.col), weight_string(coalesce(:user_col /* INT16 */, user_extra.col)) from user_extra",
+                "FieldQuery": "select coalesce(:user_col /* INT16 */, user_extra.col) from user_extra where 1 != 1",
+                "Query": "select coalesce(:user_col /* INT16 */, user_extra.col) from user_extra",
                 "Table": "user_extra"
               }
             ]
diff --git a/go/vt/vtgate/planbuilder/testdata/union_cases.json b/go/vt/vtgate/planbuilder/testdata/union_cases.json
index 49458f8c608..7feabb0a698 100644
--- a/go/vt/vtgate/planbuilder/testdata/union_cases.json
+++ b/go/vt/vtgate/planbuilder/testdata/union_cases.json
@@ -376,9 +376,8 @@
       "Instructions": {
         "OperatorType": "Distinct",
         "Collations": [
-          "(0:1)"
+          "0"
         ],
-        "ResultColumns": 1,
         "Inputs": [
           {
             "OperatorType": "Route",
@@ -387,8 +386,8 @@
               "Name": "user",
               "Sharded": true
             },
-            "FieldQuery": "select dt.c0 as id, weight_string(dt.c0) from (select id from `user` where 1 != 1 union select id from music where 1 != 1 union select 1 from dual where 1 != 1) as dt(c0) where 1 != 1",
-            "Query": "select dt.c0 as id, weight_string(dt.c0) from (select id from `user` union select id from music union select 1 from dual) as dt(c0)",
+            "FieldQuery": "select id from `user` where 1 != 1 union select id from music where 1 != 1 union select 1 from dual where 1 != 1",
+            "Query": "select id from `user` union select id from music union select 1 from dual",
             "Table": "`user`, dual, music"
           }
         ]
@@ -526,9 +525,8 @@
       "Instructions": {
         "OperatorType": "Distinct",
         "Collations": [
-          "(0:1)"
+          "0"
         ],
-        "ResultColumns": 1,
         "Inputs": [
           {
             "OperatorType": "Route",
@@ -537,8 +535,8 @@
               "Name": "user",
               "Sharded": true
             },
-            "FieldQuery": "select dt.c0 as `1`, weight_string(dt.c0) from (select 1 from dual where 1 != 1 union select null from dual where 1 != 1 union select 1.0 from dual where 1 != 1 union select '1' from dual where 1 != 1 union select 2 from dual where 1 != 1 union select 2.0 from `user` where 1 != 1) as dt(c0) where 1 != 1",
-            "Query": "select dt.c0 as `1`, weight_string(dt.c0) from (select 1 from dual union select null from dual union select 1.0 from dual union select '1' from dual union select 2 from dual union select 2.0 from `user`) as dt(c0)",
+            "FieldQuery": "select 1 from dual where 1 != 1 union select null from dual where 1 != 1 union select 1.0 from dual where 1 != 1 union select '1' from dual where 1 != 1 union select 2 from dual where 1 != 1 union select 2.0 from `user` where 1 != 1",
+            "Query": "select 1 from dual union select null from dual union select 1.0 from dual union select '1' from dual union select 2 from dual union select 2.0 from `user`",
             "Table": "`user`, dual"
           }
         ]
@@ -840,9 +838,8 @@
           {
             "OperatorType": "Distinct",
             "Collations": [
-              "(0:1)"
+              "0"
             ],
-            "ResultColumns": 1,
             "Inputs": [
               {
                 "OperatorType": "Route",
@@ -851,8 +848,8 @@
                   "Name": "user",
                   "Sharded": true
                 },
-                "FieldQuery": "select dt.c0 as id, weight_string(dt.c0) from (select id from `user` where 1 != 1 union select 3 from dual where 1 != 1) as dt(c0) where 1 != 1",
-                "Query": "select dt.c0 as id, weight_string(dt.c0) from (select id from `user` union select 3 from dual limit :__upper_limit) as dt(c0)",
+                "FieldQuery": "select id from `user` where 1 != 1 union select 3 from dual where 1 != 1",
+                "Query": "select id from `user` union select 3 from dual limit :__upper_limit",
                 "Table": "`user`, dual"
               }
             ]
@@ -1092,7 +1089,7 @@
           {
             "OperatorType": "Distinct",
             "Collations": [
-              "(0:1)",
+              "0",
               "1"
             ],
             "Inputs": [
@@ -1663,9 +1660,8 @@
       "Instructions": {
         "OperatorType": "Distinct",
         "Collations": [
-          "(0:1)"
+          "0"
         ],
-        "ResultColumns": 1,
         "Inputs": [
           {
             "OperatorType": "Route",
@@ -1674,8 +1670,8 @@
               "Name": "user",
               "Sharded": true
             },
-            "FieldQuery": "select dt.c0 as col1, weight_string(dt.c0) from (select col1 from `user` where 1 != 1 union select 3 from `user` where 1 != 1) as dt(c0) where 1 != 1",
-            "Query": "select dt.c0 as col1, weight_string(dt.c0) from (select col1 from `user` union select 3 from `user`) as dt(c0)",
+            "FieldQuery": "select col1 from `user` where 1 != 1 union select 3 from `user` where 1 != 1",
+            "Query": "select col1 from `user` union select 3 from `user`",
             "Table": "`user`"
           }
         ]
@@ -1694,9 +1690,8 @@
       "Instructions": {
         "OperatorType": "Distinct",
         "Collations": [
-          "(0:1)"
+          "0"
         ],
-        "ResultColumns": 1,
         "Inputs": [
           {
             "OperatorType": "Route",
@@ -1705,8 +1700,8 @@
               "Name": "user",
               "Sharded": true
             },
-            "FieldQuery": "select dt.c0 as `3`, weight_string(dt.c0) from (select 3 from `user` where 1 != 1 union select col1 from `user` where 1 != 1) as dt(c0) where 1 != 1",
-            "Query": "select dt.c0 as `3`, weight_string(dt.c0) from (select 3 from `user` union select col1 from `user`) as dt(c0)",
+            "FieldQuery": "select 3 from `user` where 1 != 1 union select col1 from `user` where 1 != 1",
+            "Query": "select 3 from `user` union select col1 from `user`",
             "Table": "`user`"
           }
         ]
@@ -1725,9 +1720,8 @@
       "Instructions": {
         "OperatorType": "Distinct",
         "Collations": [
-          "(0:1)"
+          "0: binary"
         ],
-        "ResultColumns": 1,
         "Inputs": [
           {
             "OperatorType": "Route",
@@ -1736,8 +1730,8 @@
               "Name": "user",
               "Sharded": true
             },
-            "FieldQuery": "select dt.c0 as `3`, weight_string(dt.c0) from (select 3 from `user` where 1 != 1 union select now() from `user` where 1 != 1) as dt(c0) where 1 != 1",
-            "Query": "select dt.c0 as `3`, weight_string(dt.c0) from (select 3 from `user` union select now() from `user`) as dt(c0)",
+            "FieldQuery": "select 3 from `user` where 1 != 1 union select now() from `user` where 1 != 1",
+            "Query": "select 3 from `user` union select now() from `user`",
             "Table": "`user`"
           }
         ]
@@ -1756,9 +1750,8 @@
       "Instructions": {
         "OperatorType": "Distinct",
         "Collations": [
-          "(0:1)"
+          "0: binary"
         ],
-        "ResultColumns": 1,
         "Inputs": [
           {
             "OperatorType": "Route",
@@ -1767,8 +1760,8 @@
               "Name": "user",
               "Sharded": true
             },
-            "FieldQuery": "select dt.c0 as `now()`, weight_string(dt.c0) from (select now() from `user` where 1 != 1 union select 3 from `user` where 1 != 1) as dt(c0) where 1 != 1",
-            "Query": "select dt.c0 as `now()`, weight_string(dt.c0) from (select now() from `user` union select 3 from `user`) as dt(c0)",
+            "FieldQuery": "select now() from `user` where 1 != 1 union select 3 from `user` where 1 != 1",
+            "Query": "select now() from `user` union select 3 from `user`",
             "Table": "`user`"
           }
         ]
@@ -1787,9 +1780,8 @@
       "Instructions": {
         "OperatorType": "Distinct",
         "Collations": [
-          "(0:1)"
+          "0"
         ],
-        "ResultColumns": 1,
         "Inputs": [
           {
             "OperatorType": "Route",
@@ -1798,8 +1790,8 @@
               "Name": "user",
               "Sharded": true
             },
-            "FieldQuery": "select dt.c0 as `now()`, weight_string(dt.c0) from (select now() from `user` where 1 != 1 union select id from `user` where 1 != 1) as dt(c0) where 1 != 1",
-            "Query": "select dt.c0 as `now()`, weight_string(dt.c0) from (select now() from `user` union select id from `user`) as dt(c0)",
+            "FieldQuery": "select now() from `user` where 1 != 1 union select id from `user` where 1 != 1",
+            "Query": "select now() from `user` union select id from `user`",
             "Table": "`user`"
           }
         ]
diff --git a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json
index 38119ba936c..6f3148e602b 100644
--- a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json
+++ b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.json
@@ -279,11 +279,6 @@
     "query": "select 1 from user u where u.col = 6 or exists (select 1 from user_extra ue where ue.col = u.col and u.col = ue.col2)",
     "plan": "VT12001: unsupported: unmergable subquery can not be inside complex expression"
   },
-  {
-    "comment": "this query needs better type information to be able to use the hash join",
-    "query": "select id from (select id from user limit 10) u join (select user_id from user_extra limit 10) ue on u.id = ue.user_id",
-    "plan": "VT12001: unsupported: missing type information for [u.id, ue.user_id]"
-  },
   {
     "comment": "multi-shard union",
     "query": "select 1 from music union (select id from user union all select name from unsharded)",
diff --git a/go/vt/vtgate/semantics/semantic_state.go b/go/vt/vtgate/semantics/semantic_state.go
index 0544764b04f..ac2fd9c1604 100644
--- a/go/vt/vtgate/semantics/semantic_state.go
+++ b/go/vt/vtgate/semantics/semantic_state.go
@@ -674,25 +674,6 @@ func (st *SemTable) TypeForExpr(e sqlparser.Expr) (evalengine.Type, bool) {
 	return evalengine.NewUnknownType(), false
 }
 
-// NeedsWeightString returns true if the given expression needs weight_string to do safe comparisons
-func (st *SemTable) NeedsWeightString(e sqlparser.Expr) bool {
-	switch e := e.(type) {
-	case *sqlparser.WeightStringFuncExpr, *sqlparser.Literal:
-		return false
-	default:
-		typ, found := st.ExprTypes[e]
-		if !found {
-			return true
-		}
-
-		if !sqltypes.IsText(typ.Type()) {
-			return false
-		}
-
-		return !st.collEnv.IsSupported(typ.Collation())
-	}
-}
-
 func (st *SemTable) DefaultCollation() collations.ID {
 	return st.Collation
 }