From 03d8bb2dbca1cf928a2035bc436b834eeb0fef78 Mon Sep 17 00:00:00 2001 From: Sujith Jay Nair Date: Mon, 27 May 2024 19:32:37 -0400 Subject: [PATCH 1/3] Fallback to Spark for LIKE with custom escape character Currently, LIKE with custom escape character produces incorrect results. --- .../org/apache/comet/serde/QueryPlanSerde.scala | 2 +- .../org/apache/comet/CometExpressionSuite.scala | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 6eda0547f..33e789080 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -981,7 +981,7 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde { None } - case Like(left, right, _) => + case Like(left, right, escapeChar) if escapeChar == '\\' => // TODO escapeChar val leftExpr = exprToProtoInternal(left, inputs) val rightExpr = exprToProtoInternal(right, inputs) diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 3683c8d44..83f6ad917 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -578,6 +578,20 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } + test("like with custom escape") { + val table = "names" + withTable(table) { + sql(s"create table $table(id int, name varchar(20)) using parquet") + sql(s"insert into $table values(1,'James Smith')") + sql(s"insert into $table values(2,'Michael_Rose')") + sql(s"insert into $table values(3,'Robert_R_Williams')") + + // Filter column having values that include underscores + val query = sql("select id from names where name like '%$_%' escape '$'") + checkAnswer(query, Row(2) :: Row(3) :: Nil) + } + } + test("contains") { assume(!isSpark32) From bfac08a2e8029464c1d7ec44bb46d9d33384a1ef Mon Sep 17 00:00:00 2001 From: Sujith Jay Nair Date: Tue, 28 May 2024 15:09:46 -0400 Subject: [PATCH 2/3] For custom escape character, provide user with specific info message --- .../apache/comet/serde/QueryPlanSerde.scala | 33 +++++++++++-------- .../apache/comet/CometExpressionSuite.scala | 8 +++-- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 33e789080..6bc9d193d 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -981,23 +981,28 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde { None } - case Like(left, right, escapeChar) if escapeChar == '\\' => - // TODO escapeChar - val leftExpr = exprToProtoInternal(left, inputs) - val rightExpr = exprToProtoInternal(right, inputs) + case Like(left, right, escapeChar) => + if (escapeChar == '\\') { + val leftExpr = exprToProtoInternal(left, inputs) + val rightExpr = exprToProtoInternal(right, inputs) - if (leftExpr.isDefined && rightExpr.isDefined) { - val builder = ExprOuterClass.Like.newBuilder() - builder.setLeft(leftExpr.get) - builder.setRight(rightExpr.get) + if (leftExpr.isDefined && rightExpr.isDefined) { + val builder = ExprOuterClass.Like.newBuilder() + builder.setLeft(leftExpr.get) + builder.setRight(rightExpr.get) - Some( - ExprOuterClass.Expr - .newBuilder() - .setLike(builder) - .build()) + Some( + ExprOuterClass.Expr + .newBuilder() + .setLike(builder) + .build()) + } else { + withInfo(expr, left, right) + None + } } else { - withInfo(expr, left, right) + // TODO custom escape char + withInfo(expr, s"custom escape character $escapeChar not supported in LIKE") None } diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 83f6ad917..ff2fb53ba 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -587,8 +587,12 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { sql(s"insert into $table values(3,'Robert_R_Williams')") // Filter column having values that include underscores - val query = sql("select id from names where name like '%$_%' escape '$'") - checkAnswer(query, Row(2) :: Row(3) :: Nil) + val queryDefaultEscape = sql("select id from names where name like '%\\_%'") + checkAnswer(queryDefaultEscape, Row(2) :: Row(3) :: Nil) + + val queryCustomEscape = sql("select id from names where name like '%$_%' escape '$'") + checkAnswer(queryCustomEscape, Row(2) :: Row(3) :: Nil) + } } From 496e885da9c938519822bf021505e7fcf90395b5 Mon Sep 17 00:00:00 2001 From: Sujith Jay Nair Date: Tue, 28 May 2024 17:40:36 -0400 Subject: [PATCH 3/3] Test case for default escape char with checkSparkAnswerAndOperator --- .../src/test/scala/org/apache/comet/CometExpressionSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index ff2fb53ba..426639607 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -588,7 +588,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { // Filter column having values that include underscores val queryDefaultEscape = sql("select id from names where name like '%\\_%'") - checkAnswer(queryDefaultEscape, Row(2) :: Row(3) :: Nil) + checkSparkAnswerAndOperator(queryDefaultEscape) val queryCustomEscape = sql("select id from names where name like '%$_%' escape '$'") checkAnswer(queryCustomEscape, Row(2) :: Row(3) :: Nil)