From b2a1a2508618a66ca7690fcef23a8c33a1f6d5fb Mon Sep 17 00:00:00 2001 From: Murphy Date: Tue, 26 Nov 2024 16:23:04 +0800 Subject: [PATCH 1/2] improve sql digest for massive compound predicates Signed-off-by: Murphy --- .../sql/common/SqlDigestBuilder.java | 62 +++++++++++++++++++ .../sql/common/SqlDigestBuilderTest.java | 11 ++++ 2 files changed, 73 insertions(+) diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/common/SqlDigestBuilder.java b/fe/fe-core/src/main/java/com/starrocks/sql/common/SqlDigestBuilder.java index fe8881174dda7..3c70285b306f1 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/common/SqlDigestBuilder.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/common/SqlDigestBuilder.java @@ -15,15 +15,24 @@ package com.starrocks.sql.common; import com.google.common.base.Joiner; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import com.starrocks.analysis.CompoundPredicate; +import com.starrocks.analysis.Expr; import com.starrocks.analysis.InPredicate; import com.starrocks.analysis.LimitElement; import com.starrocks.analysis.LiteralExpr; +import com.starrocks.analysis.SlotRef; +import com.starrocks.common.Pair; import com.starrocks.sql.analyzer.AstToStringBuilder; import com.starrocks.sql.ast.StatementBase; import com.starrocks.sql.ast.ValuesRelation; import org.apache.commons.lang3.StringUtils; import java.util.List; +import java.util.Queue; +import java.util.Set; +import java.util.function.Consumer; import java.util.stream.Collectors; /** @@ -31,12 +40,45 @@ */ public class SqlDigestBuilder { + private static final int MASSIVE_COMPOUND_LIMIT = 16; + public static String build(StatementBase statement) { return new SqlDigestBuilderVisitor().visit(statement); } private static class SqlDigestBuilderVisitor extends AstToStringBuilder.AST2StringBuilderVisitor { + private Pair countCompound(CompoundPredicate node) { + int andCount = 0; + int orCount = 0; + Queue q = Lists.newLinkedList(); + q.add(node); + while (!q.isEmpty()) { + Expr head = q.poll(); + if (head instanceof CompoundPredicate) { + CompoundPredicate.Operator op = ((CompoundPredicate) head).getOp(); + if (op == CompoundPredicate.Operator.AND) { + andCount++; + } else if (op == CompoundPredicate.Operator.OR) { + orCount++; + } + } + q.addAll(head.getChildren()); + } + + return Pair.create(andCount, orCount); + } + + private void traverse(CompoundPredicate node, Consumer consumer) { + Queue q = Lists.newLinkedList(); + q.add(node); + while (!q.isEmpty()) { + Expr head = q.poll(); + consumer.accept(head); + q.addAll(head.getChildren()); + } + } + @Override public String visitInPredicate(InPredicate node, Void context) { if (!node.isLiteralChildren()) { @@ -50,6 +92,26 @@ public String visitInPredicate(InPredicate node, Void context) { } } + @Override + public String visitCompoundPredicate(CompoundPredicate node, Void context) { + Pair pair = countCompound(node); + if (pair.first + pair.second >= MASSIVE_COMPOUND_LIMIT) { + // Only record de-duplicated slots if there are too many compounds + Set exprs = Sets.newHashSet(); + traverse(node, x -> { + if (x instanceof SlotRef) { + exprs.add((SlotRef) x); + } + }); + return "$massive_compounds[" + exprs.stream().map(SlotRef::toSqlImpl) + .collect(Collectors.joining(",")) + "]$"; + } else { + // TODO: it will introduce a little bit overhead in top-down visiting, in which the countCompound is + // duplicated. it's better to eliminate this overhead + return super.visitCompoundPredicate(node, context); + } + } + @Override public String visitValues(ValuesRelation node, Void scope) { if (node.isNullValues()) { diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/common/SqlDigestBuilderTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/common/SqlDigestBuilderTest.java index 3495ff7afa601..b60ba9056b5b5 100644 --- a/fe/fe-core/src/test/java/com/starrocks/sql/common/SqlDigestBuilderTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/sql/common/SqlDigestBuilderTest.java @@ -66,6 +66,17 @@ public static void afterClass() { "|INSERT INTO `test`.`part_t1` PARTITION (p1) VALUES(?, ?, ?)", "insert overwrite part_t1 partition (p1) values(1,2,3) " + "|INSERT OVERWRITE `test`.`part_t1` PARTITION (p1) VALUES(?, ?, ?)", + + // massive compounds + "select * from t1 where v4=1 or v4=2 or v4=3 or v4=4 or v4=5 or v4=6 or v4=7 or v4=8 or v4=9 or v4=10 " + + "or v4=11 or v4=12 or v4=13 or v4=14 or v4=15 or v4=16 or v4=17 or v4=18 " + + "or v4=19 or v4=20| " + + "SELECT * FROM test.t1 WHERE $massive_compounds[`test`.`t1`.`v4`]$", + "select * from t1 where v5 = 123 and (v4=1 or v4=2 or v4=3 or v4=4 or v4=5 or v4=6 or v4=7 or v4=8 or " + + "v4=9 or v4=10 " + + "or v4=11 or v4=12 or v4=13 or v4=14 or v4=15 or v4=16 or v4=17 or v4=18 " + + "or v4=19 or v4=20)| " + + "SELECT * FROM test.t1 WHERE $massive_compounds[`test`.`t1`.`v5`,`test`.`t1`.`v4`]$", }) public void testBuild(String sql, String expectedDigest) throws Exception { StatementBase stmt = UtFrameUtils.parseStmtWithNewParser(sql, connectContext); From 8693c26275260af1b878ded627d733d007b22490 Mon Sep 17 00:00:00 2001 From: Murphy Date: Tue, 26 Nov 2024 20:39:02 +0800 Subject: [PATCH 2/2] fix comment Signed-off-by: Murphy --- .../java/com/starrocks/analysis/Expr.java | 81 ++++--------------- .../java/com/starrocks/analysis/SlotRef.java | 4 + .../com/starrocks/planner/LoadScanNode.java | 3 +- .../starrocks/sql/analyzer/AnalyzerUtils.java | 61 ++++++++++++++ .../sql/common/SqlDigestBuilder.java | 61 +++----------- .../transformer/RelationTransformer.java | 5 +- .../sql/common/SqlDigestBuilderTest.java | 7 +- 7 files changed, 105 insertions(+), 117 deletions(-) diff --git a/fe/fe-core/src/main/java/com/starrocks/analysis/Expr.java b/fe/fe-core/src/main/java/com/starrocks/analysis/Expr.java index 62e9b680dc683..a99458482f314 100644 --- a/fe/fe-core/src/main/java/com/starrocks/analysis/Expr.java +++ b/fe/fe-core/src/main/java/com/starrocks/analysis/Expr.java @@ -40,6 +40,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import com.starrocks.catalog.Function; import com.starrocks.catalog.FunctionSet; import com.starrocks.catalog.ScalarType; @@ -82,6 +83,7 @@ import java.util.List; import java.util.ListIterator; import java.util.Map; +import java.util.Queue; import java.util.stream.Collectors; /** @@ -440,63 +442,6 @@ public void analyzeNoThrow(Analyzer analyzer) { } } - /** - * Gather conjuncts from this expr and return them in a list. - * A conjunct is an expr that returns a boolean, e.g., Predicates, function calls, - * SlotRefs, etc. Hence, this method is placed here and not in Predicate. - */ - public static List extractConjuncts(Expr root) { - List conjuncts = Lists.newArrayList(); - if (null == root) { - return conjuncts; - } - - extractConjunctsImpl(root, conjuncts); - return conjuncts; - } - - private static void extractConjunctsImpl(Expr root, List conjuncts) { - if (!(root instanceof CompoundPredicate)) { - conjuncts.add(root); - return; - } - - CompoundPredicate cpe = (CompoundPredicate) root; - if (!CompoundPredicate.Operator.AND.equals(cpe.getOp())) { - conjuncts.add(root); - return; - } - - extractConjunctsImpl(cpe.getChild(0), conjuncts); - extractConjunctsImpl(cpe.getChild(1), conjuncts); - } - - public static List flattenPredicate(Expr root) { - List children = Lists.newArrayList(); - if (null == root) { - return children; - } - - flattenPredicate(root, children); - return children; - } - - private static void flattenPredicate(Expr root, List children) { - if (!(root instanceof CompoundPredicate)) { - children.add(root); - return; - } - - CompoundPredicate cpe = (CompoundPredicate) root; - if (CompoundPredicate.Operator.AND.equals(cpe.getOp()) || CompoundPredicate.Operator.OR.equals(cpe.getOp())) { - extractConjunctsImpl(cpe.getChild(0), children); - extractConjunctsImpl(cpe.getChild(1), children); - } else { - children.add(root); - } - } - - public static Expr compoundAnd(Collection conjuncts) { return createCompound(CompoundPredicate.Operator.AND, conjuncts); } @@ -1218,14 +1163,22 @@ public SlotRef unwrapSlotRef(boolean implicitOnly) { } public List collectAllSlotRefs() { - List result = Lists.newArrayList(); - if (this instanceof SlotRef) { - result.add((SlotRef) this); - } - for (Expr child : children) { - result.addAll(child.collectAllSlotRefs()); + return collectAllSlotRefs(false); + } + + public List collectAllSlotRefs(boolean distinct) { + Collection result = distinct ? Sets.newHashSet() : Lists.newArrayList(); + Queue q = Lists.newLinkedList(); + q.add(this); + while (!q.isEmpty()) { + Expr head = q.poll(); + if (head instanceof SlotRef) { + result.add((SlotRef) head); + } + q.addAll(head.getChildren()); } - return result; + + return distinct ? Lists.newArrayList(result) : (List) result; } /** diff --git a/fe/fe-core/src/main/java/com/starrocks/analysis/SlotRef.java b/fe/fe-core/src/main/java/com/starrocks/analysis/SlotRef.java index 75818f784cf51..a229f7dadf1dc 100644 --- a/fe/fe-core/src/main/java/com/starrocks/analysis/SlotRef.java +++ b/fe/fe-core/src/main/java/com/starrocks/analysis/SlotRef.java @@ -310,6 +310,10 @@ public String toSqlImpl() { } } + public boolean isColumnRef() { + return tblName != null && !isFromLambda(); + } + @Override public String explainImpl() { if (label != null) { diff --git a/fe/fe-core/src/main/java/com/starrocks/planner/LoadScanNode.java b/fe/fe-core/src/main/java/com/starrocks/planner/LoadScanNode.java index d580e2014470e..2e1e5e6546d7c 100644 --- a/fe/fe-core/src/main/java/com/starrocks/planner/LoadScanNode.java +++ b/fe/fe-core/src/main/java/com/starrocks/planner/LoadScanNode.java @@ -45,6 +45,7 @@ import com.starrocks.catalog.AggregateType; import com.starrocks.common.AnalysisException; import com.starrocks.common.UserException; +import com.starrocks.sql.analyzer.AnalyzerUtils; import java.util.List; import java.util.Map; @@ -86,7 +87,7 @@ protected void initWhereExpr(Expr whereExpr, Analyzer analyzer) throws UserExcep if (!whereExpr.getType().isBoolean()) { throw new UserException("where statement is not a valid statement return bool"); } - addConjuncts(Expr.extractConjuncts(whereExpr)); + addConjuncts(AnalyzerUtils.extractConjuncts(whereExpr)); } protected void checkBitmapCompatibility(Analyzer analyzer, SlotDescriptor slotDesc, Expr expr) diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AnalyzerUtils.java b/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AnalyzerUtils.java index 7529c36a1a5d8..22212d87814cd 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AnalyzerUtils.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AnalyzerUtils.java @@ -29,6 +29,7 @@ import com.google.common.collect.Sets; import com.starrocks.analysis.AnalyticExpr; import com.starrocks.analysis.CastExpr; +import com.starrocks.analysis.CompoundPredicate; import com.starrocks.analysis.DateLiteral; import com.starrocks.analysis.Expr; import com.starrocks.analysis.FunctionCallExpr; @@ -135,6 +136,7 @@ import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Queue; import java.util.Set; import java.util.stream.Collectors; @@ -290,6 +292,65 @@ public static CallOperator getCallOperator(ScalarOperator operator) { return null; } + /** + * Gather conjuncts from this expr and return them in a list. + * A conjunct is an expr that returns a boolean, e.g., Predicates, function calls, + * SlotRefs, etc. Hence, this method is placed here and not in Predicate. + */ + public static List extractConjuncts(Expr root) { + List conjuncts = Lists.newArrayList(); + if (null == root) { + return conjuncts; + } + + extractConjunctsImpl(root, conjuncts); + return conjuncts; + } + + private static void extractConjunctsImpl(Expr root, List conjuncts) { + if (!(root instanceof CompoundPredicate)) { + conjuncts.add(root); + return; + } + + CompoundPredicate cpe = (CompoundPredicate) root; + if (!CompoundPredicate.Operator.AND.equals(cpe.getOp())) { + conjuncts.add(root); + return; + } + + extractConjunctsImpl(cpe.getChild(0), conjuncts); + extractConjunctsImpl(cpe.getChild(1), conjuncts); + } + + /** + * Flatten AND/OR tree + */ + public static List flattenPredicate(Expr root) { + List children = Lists.newArrayList(); + if (null == root) { + return children; + } + + flattenPredicate(root, children); + return children; + } + + private static void flattenPredicate(Expr root, List children) { + Queue q = Lists.newLinkedList(); + q.add(root); + while (!q.isEmpty()) { + Expr head = q.poll(); + if (head instanceof CompoundPredicate && + (((CompoundPredicate) head).getOp() == CompoundPredicate.Operator.AND || + ((CompoundPredicate) head).getOp() == CompoundPredicate.Operator.OR)) { + q.addAll(head.getChildren()); + } else { + children.add(head); + } + } + } + private static class DBCollector implements AstVisitor { private final Map dbs; private final ConnectContext session; diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/common/SqlDigestBuilder.java b/fe/fe-core/src/main/java/com/starrocks/sql/common/SqlDigestBuilder.java index 3c70285b306f1..3540863f9c4bc 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/common/SqlDigestBuilder.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/common/SqlDigestBuilder.java @@ -15,24 +15,19 @@ package com.starrocks.sql.common; import com.google.common.base.Joiner; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; import com.starrocks.analysis.CompoundPredicate; import com.starrocks.analysis.Expr; import com.starrocks.analysis.InPredicate; import com.starrocks.analysis.LimitElement; import com.starrocks.analysis.LiteralExpr; import com.starrocks.analysis.SlotRef; -import com.starrocks.common.Pair; +import com.starrocks.sql.analyzer.AnalyzerUtils; import com.starrocks.sql.analyzer.AstToStringBuilder; import com.starrocks.sql.ast.StatementBase; import com.starrocks.sql.ast.ValuesRelation; import org.apache.commons.lang3.StringUtils; import java.util.List; -import java.util.Queue; -import java.util.Set; -import java.util.function.Consumer; import java.util.stream.Collectors; /** @@ -48,37 +43,6 @@ public static String build(StatementBase statement) { private static class SqlDigestBuilderVisitor extends AstToStringBuilder.AST2StringBuilderVisitor { - private Pair countCompound(CompoundPredicate node) { - int andCount = 0; - int orCount = 0; - Queue q = Lists.newLinkedList(); - q.add(node); - while (!q.isEmpty()) { - Expr head = q.poll(); - if (head instanceof CompoundPredicate) { - CompoundPredicate.Operator op = ((CompoundPredicate) head).getOp(); - if (op == CompoundPredicate.Operator.AND) { - andCount++; - } else if (op == CompoundPredicate.Operator.OR) { - orCount++; - } - } - q.addAll(head.getChildren()); - } - - return Pair.create(andCount, orCount); - } - - private void traverse(CompoundPredicate node, Consumer consumer) { - Queue q = Lists.newLinkedList(); - q.add(node); - while (!q.isEmpty()) { - Expr head = q.poll(); - consumer.accept(head); - q.addAll(head.getChildren()); - } - } - @Override public String visitInPredicate(InPredicate node, Void context) { if (!node.isLiteralChildren()) { @@ -94,20 +58,19 @@ public String visitInPredicate(InPredicate node, Void context) { @Override public String visitCompoundPredicate(CompoundPredicate node, Void context) { - Pair pair = countCompound(node); - if (pair.first + pair.second >= MASSIVE_COMPOUND_LIMIT) { + List flatten = AnalyzerUtils.flattenPredicate(node); + if (flatten.size() >= MASSIVE_COMPOUND_LIMIT) { // Only record de-duplicated slots if there are too many compounds - Set exprs = Sets.newHashSet(); - traverse(node, x -> { - if (x instanceof SlotRef) { - exprs.add((SlotRef) x); - } - }); - return "$massive_compounds[" + exprs.stream().map(SlotRef::toSqlImpl) - .collect(Collectors.joining(",")) + "]$"; + List exprs = node.collectAllSlotRefs(true); + String sortedSlots = exprs.stream() + .filter(SlotRef::isColumnRef) + .map(SlotRef::toSqlImpl) + .sorted() + .collect(Collectors.joining(",")); + return "$massive_compounds[" + sortedSlots + "]$"; } else { - // TODO: it will introduce a little bit overhead in top-down visiting, in which the countCompound is - // duplicated. it's better to eliminate this overhead + // TODO: it will introduce a little bit overhead in top-down visiting, in which the + // flattenPredicate is duplicated revoked. it's better to eliminate this overhead return super.visitCompoundPredicate(node, context); } } diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/transformer/RelationTransformer.java b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/transformer/RelationTransformer.java index c876a43248080..ccbbaf6d386a7 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/transformer/RelationTransformer.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/transformer/RelationTransformer.java @@ -48,6 +48,7 @@ import com.starrocks.qe.SessionVariable; import com.starrocks.server.GlobalStateMgr; import com.starrocks.sql.analyzer.AnalyzeState; +import com.starrocks.sql.analyzer.AnalyzerUtils; import com.starrocks.sql.analyzer.ExpressionAnalyzer; import com.starrocks.sql.analyzer.Field; import com.starrocks.sql.analyzer.FieldId; @@ -1129,7 +1130,7 @@ private Triple parseJoinOnPredic List leftOutputColumns, List rightOutputColumns, ExpressionMapping expressionMapping) { // Step1 - List exprConjuncts = Expr.extractConjuncts(node.getOnPredicate()); + List exprConjuncts = AnalyzerUtils.extractConjuncts(node.getOnPredicate()); List scalarConjuncts = Lists.newArrayList(); Map allSubqueryPlaceholders = Maps.newHashMap(); @@ -1206,7 +1207,7 @@ private Triple parseJoinOnPredic private boolean isJoinLeftRelatedSubquery(JoinRelation node, Expr joinOnConjunct) { List subqueries = Lists.newArrayList(); - List elements = Expr.flattenPredicate(joinOnConjunct); + List elements = AnalyzerUtils.flattenPredicate(joinOnConjunct); List predicateWithSubquery = Lists.newArrayList(); for (Expr element : elements) { int oldSize = subqueries.size(); diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/common/SqlDigestBuilderTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/common/SqlDigestBuilderTest.java index b60ba9056b5b5..4fb13fe2d73ba 100644 --- a/fe/fe-core/src/test/java/com/starrocks/sql/common/SqlDigestBuilderTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/sql/common/SqlDigestBuilderTest.java @@ -72,11 +72,16 @@ public static void afterClass() { "or v4=11 or v4=12 or v4=13 or v4=14 or v4=15 or v4=16 or v4=17 or v4=18 " + "or v4=19 or v4=20| " + "SELECT * FROM test.t1 WHERE $massive_compounds[`test`.`t1`.`v4`]$", + "select * from t1 where v4+v5=1 or v4+v5=2 or v4+v5=3 or v4=4 or v4=5 or v4=6 or v4=7 or v4=8 or v4=9 or " + + "v4=10 " + + "or v4=11 or v4=12 or v4=13 or v4=14 or v4=15 or v4=16 or v4=17 or v4=18 " + + "or v4=19 or v4=20| " + + "SELECT * FROM test.t1 WHERE $massive_compounds[`test`.`t1`.`v4`,`test`.`t1`.`v5`]$", "select * from t1 where v5 = 123 and (v4=1 or v4=2 or v4=3 or v4=4 or v4=5 or v4=6 or v4=7 or v4=8 or " + "v4=9 or v4=10 " + "or v4=11 or v4=12 or v4=13 or v4=14 or v4=15 or v4=16 or v4=17 or v4=18 " + "or v4=19 or v4=20)| " + - "SELECT * FROM test.t1 WHERE $massive_compounds[`test`.`t1`.`v5`,`test`.`t1`.`v4`]$", + "SELECT * FROM test.t1 WHERE $massive_compounds[`test`.`t1`.`v4`,`test`.`t1`.`v5`]$", }) public void testBuild(String sql, String expectedDigest) throws Exception { StatementBase stmt = UtFrameUtils.parseStmtWithNewParser(sql, connectContext);