Skip to content

Commit

Permalink
[Enhancement] improve sql digest for massive compound predicates (#53207
Browse files Browse the repository at this point in the history
)

Signed-off-by: Murphy <[email protected]>
(cherry picked from commit c411ac5)
  • Loading branch information
murphyatwork authored and mergify[bot] committed Nov 28, 2024
1 parent 6b649b1 commit fc7759c
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 67 deletions.
81 changes: 17 additions & 64 deletions fe/fe-core/src/main/java/com/starrocks/analysis/Expr.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.starrocks.catalog.Function;
import com.starrocks.catalog.FunctionSet;
import com.starrocks.catalog.ScalarType;
Expand Down Expand Up @@ -82,6 +83,7 @@
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Queue;
import java.util.stream.Collectors;

/**
Expand Down Expand Up @@ -440,63 +442,6 @@ public void analyzeNoThrow(Analyzer analyzer) {
}
}

/**
* Gather conjuncts from this expr and return them in a list.
* A conjunct is an expr that returns a boolean, e.g., Predicates, function calls,
* SlotRefs, etc. Hence, this method is placed here and not in Predicate.
*/
public static List<Expr> extractConjuncts(Expr root) {
List<Expr> conjuncts = Lists.newArrayList();
if (null == root) {
return conjuncts;
}

extractConjunctsImpl(root, conjuncts);
return conjuncts;
}

private static void extractConjunctsImpl(Expr root, List<Expr> conjuncts) {
if (!(root instanceof CompoundPredicate)) {
conjuncts.add(root);
return;
}

CompoundPredicate cpe = (CompoundPredicate) root;
if (!CompoundPredicate.Operator.AND.equals(cpe.getOp())) {
conjuncts.add(root);
return;
}

extractConjunctsImpl(cpe.getChild(0), conjuncts);
extractConjunctsImpl(cpe.getChild(1), conjuncts);
}

public static List<Expr> flattenPredicate(Expr root) {
List<Expr> children = Lists.newArrayList();
if (null == root) {
return children;
}

flattenPredicate(root, children);
return children;
}

private static void flattenPredicate(Expr root, List<Expr> children) {
if (!(root instanceof CompoundPredicate)) {
children.add(root);
return;
}

CompoundPredicate cpe = (CompoundPredicate) root;
if (CompoundPredicate.Operator.AND.equals(cpe.getOp()) || CompoundPredicate.Operator.OR.equals(cpe.getOp())) {
extractConjunctsImpl(cpe.getChild(0), children);
extractConjunctsImpl(cpe.getChild(1), children);
} else {
children.add(root);
}
}


public static Expr compoundAnd(Collection<Expr> conjuncts) {
return createCompound(CompoundPredicate.Operator.AND, conjuncts);
}
Expand Down Expand Up @@ -1218,14 +1163,22 @@ public SlotRef unwrapSlotRef(boolean implicitOnly) {
}

public List<SlotRef> collectAllSlotRefs() {
List<SlotRef> result = Lists.newArrayList();
if (this instanceof SlotRef) {
result.add((SlotRef) this);
}
for (Expr child : children) {
result.addAll(child.collectAllSlotRefs());
return collectAllSlotRefs(false);
}

public List<SlotRef> collectAllSlotRefs(boolean distinct) {
Collection<SlotRef> result = distinct ? Sets.newHashSet() : Lists.newArrayList();
Queue<Expr> q = Lists.newLinkedList();
q.add(this);
while (!q.isEmpty()) {
Expr head = q.poll();
if (head instanceof SlotRef) {
result.add((SlotRef) head);
}
q.addAll(head.getChildren());
}
return result;

return distinct ? Lists.newArrayList(result) : (List<SlotRef>) result;
}

/**
Expand Down
4 changes: 4 additions & 0 deletions fe/fe-core/src/main/java/com/starrocks/analysis/SlotRef.java
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,10 @@ public String toSqlImpl() {
}
}

public boolean isColumnRef() {
return tblName != null && !isFromLambda();
}

@Override
public String explainImpl() {
if (label != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import com.starrocks.catalog.AggregateType;
import com.starrocks.common.AnalysisException;
import com.starrocks.common.UserException;
import com.starrocks.sql.analyzer.AnalyzerUtils;

import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -86,7 +87,7 @@ protected void initWhereExpr(Expr whereExpr, Analyzer analyzer) throws UserExcep
if (!whereExpr.getType().isBoolean()) {
throw new UserException("where statement is not a valid statement return bool");
}
addConjuncts(Expr.extractConjuncts(whereExpr));
addConjuncts(AnalyzerUtils.extractConjuncts(whereExpr));
}

protected void checkBitmapCompatibility(Analyzer analyzer, SlotDescriptor slotDesc, Expr expr)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import com.google.common.collect.Sets;
import com.starrocks.analysis.AnalyticExpr;
import com.starrocks.analysis.CastExpr;
import com.starrocks.analysis.CompoundPredicate;
import com.starrocks.analysis.DateLiteral;
import com.starrocks.analysis.Expr;
import com.starrocks.analysis.FunctionCallExpr;
Expand Down Expand Up @@ -135,6 +136,7 @@
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Queue;
import java.util.Set;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -290,6 +292,65 @@ public static CallOperator getCallOperator(ScalarOperator operator) {
return null;
}

/**
* Gather conjuncts from this expr and return them in a list.
* A conjunct is an expr that returns a boolean, e.g., Predicates, function calls,
* SlotRefs, etc. Hence, this method is placed here and not in Predicate.
*/
public static List<Expr> extractConjuncts(Expr root) {
List<Expr> conjuncts = Lists.newArrayList();
if (null == root) {
return conjuncts;
}

extractConjunctsImpl(root, conjuncts);
return conjuncts;
}

private static void extractConjunctsImpl(Expr root, List<Expr> conjuncts) {
if (!(root instanceof CompoundPredicate)) {
conjuncts.add(root);
return;
}

CompoundPredicate cpe = (CompoundPredicate) root;
if (!CompoundPredicate.Operator.AND.equals(cpe.getOp())) {
conjuncts.add(root);
return;
}

extractConjunctsImpl(cpe.getChild(0), conjuncts);
extractConjunctsImpl(cpe.getChild(1), conjuncts);
}

/**
* Flatten AND/OR tree
*/
public static List<Expr> flattenPredicate(Expr root) {
List<Expr> children = Lists.newArrayList();
if (null == root) {
return children;
}

flattenPredicate(root, children);
return children;
}

private static void flattenPredicate(Expr root, List<Expr> children) {
Queue<Expr> q = Lists.newLinkedList();
q.add(root);
while (!q.isEmpty()) {
Expr head = q.poll();
if (head instanceof CompoundPredicate &&
(((CompoundPredicate) head).getOp() == CompoundPredicate.Operator.AND ||
((CompoundPredicate) head).getOp() == CompoundPredicate.Operator.OR)) {
q.addAll(head.getChildren());
} else {
children.add(head);
}
}
}

private static class DBCollector implements AstVisitor<Void, Void> {
private final Map<String, Database> dbs;
private final ConnectContext session;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,13 @@
package com.starrocks.sql.common;

import com.google.common.base.Joiner;
import com.starrocks.analysis.CompoundPredicate;
import com.starrocks.analysis.Expr;
import com.starrocks.analysis.InPredicate;
import com.starrocks.analysis.LimitElement;
import com.starrocks.analysis.LiteralExpr;
import com.starrocks.analysis.SlotRef;
import com.starrocks.sql.analyzer.AnalyzerUtils;
import com.starrocks.sql.analyzer.AstToStringBuilder;
import com.starrocks.sql.ast.StatementBase;
import com.starrocks.sql.ast.ValuesRelation;
Expand All @@ -31,6 +35,8 @@
*/
public class SqlDigestBuilder {

private static final int MASSIVE_COMPOUND_LIMIT = 16;

public static String build(StatementBase statement) {
return new SqlDigestBuilderVisitor().visit(statement);
}
Expand All @@ -50,6 +56,25 @@ public String visitInPredicate(InPredicate node, Void context) {
}
}

@Override
public String visitCompoundPredicate(CompoundPredicate node, Void context) {
List<Expr> flatten = AnalyzerUtils.flattenPredicate(node);
if (flatten.size() >= MASSIVE_COMPOUND_LIMIT) {
// Only record de-duplicated slots if there are too many compounds
List<SlotRef> exprs = node.collectAllSlotRefs(true);
String sortedSlots = exprs.stream()
.filter(SlotRef::isColumnRef)
.map(SlotRef::toSqlImpl)
.sorted()
.collect(Collectors.joining(","));
return "$massive_compounds[" + sortedSlots + "]$";
} else {
// TODO: it will introduce a little bit overhead in top-down visiting, in which the
// flattenPredicate is duplicated revoked. it's better to eliminate this overhead
return super.visitCompoundPredicate(node, context);
}
}

@Override
public String visitValues(ValuesRelation node, Void scope) {
if (node.isNullValues()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import com.starrocks.qe.SessionVariable;
import com.starrocks.server.GlobalStateMgr;
import com.starrocks.sql.analyzer.AnalyzeState;
import com.starrocks.sql.analyzer.AnalyzerUtils;
import com.starrocks.sql.analyzer.ExpressionAnalyzer;
import com.starrocks.sql.analyzer.Field;
import com.starrocks.sql.analyzer.FieldId;
Expand Down Expand Up @@ -1129,7 +1130,7 @@ private Triple<ScalarOperator, OptExprBuilder, OptExprBuilder> parseJoinOnPredic
List<ColumnRefOperator> leftOutputColumns, List<ColumnRefOperator> rightOutputColumns,
ExpressionMapping expressionMapping) {
// Step1
List<Expr> exprConjuncts = Expr.extractConjuncts(node.getOnPredicate());
List<Expr> exprConjuncts = AnalyzerUtils.extractConjuncts(node.getOnPredicate());

List<ScalarOperator> scalarConjuncts = Lists.newArrayList();
Map<ScalarOperator, SubqueryOperator> allSubqueryPlaceholders = Maps.newHashMap();
Expand Down Expand Up @@ -1206,7 +1207,7 @@ private Triple<ScalarOperator, OptExprBuilder, OptExprBuilder> parseJoinOnPredic
private boolean isJoinLeftRelatedSubquery(JoinRelation node, Expr joinOnConjunct) {
List<Subquery> subqueries = Lists.newArrayList();

List<Expr> elements = Expr.flattenPredicate(joinOnConjunct);
List<Expr> elements = AnalyzerUtils.flattenPredicate(joinOnConjunct);
List<Expr> predicateWithSubquery = Lists.newArrayList();
for (Expr element : elements) {
int oldSize = subqueries.size();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,22 @@ public static void afterClass() {
"|INSERT INTO `test`.`part_t1` PARTITION (p1) VALUES(?, ?, ?)",
"insert overwrite part_t1 partition (p1) values(1,2,3) " +
"|INSERT OVERWRITE `test`.`part_t1` PARTITION (p1) VALUES(?, ?, ?)",

// massive compounds
"select * from t1 where v4=1 or v4=2 or v4=3 or v4=4 or v4=5 or v4=6 or v4=7 or v4=8 or v4=9 or v4=10 " +
"or v4=11 or v4=12 or v4=13 or v4=14 or v4=15 or v4=16 or v4=17 or v4=18 " +
"or v4=19 or v4=20| " +
"SELECT * FROM test.t1 WHERE $massive_compounds[`test`.`t1`.`v4`]$",
"select * from t1 where v4+v5=1 or v4+v5=2 or v4+v5=3 or v4=4 or v4=5 or v4=6 or v4=7 or v4=8 or v4=9 or " +
"v4=10 " +
"or v4=11 or v4=12 or v4=13 or v4=14 or v4=15 or v4=16 or v4=17 or v4=18 " +
"or v4=19 or v4=20| " +
"SELECT * FROM test.t1 WHERE $massive_compounds[`test`.`t1`.`v4`,`test`.`t1`.`v5`]$",
"select * from t1 where v5 = 123 and (v4=1 or v4=2 or v4=3 or v4=4 or v4=5 or v4=6 or v4=7 or v4=8 or " +
"v4=9 or v4=10 " +
"or v4=11 or v4=12 or v4=13 or v4=14 or v4=15 or v4=16 or v4=17 or v4=18 " +
"or v4=19 or v4=20)| " +
"SELECT * FROM test.t1 WHERE $massive_compounds[`test`.`t1`.`v4`,`test`.`t1`.`v5`]$",
})
public void testBuild(String sql, String expectedDigest) throws Exception {
StatementBase stmt = UtFrameUtils.parseStmtWithNewParser(sql, connectContext);
Expand Down

0 comments on commit fc7759c

Please sign in to comment.