Skip to content

Commit

Permalink
feat: Support InSet expression in Comet
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya committed Feb 20, 2024
1 parent 7e206e2 commit cfce736
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1377,6 +1377,15 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde {
case In(value, list) =>
in(value, list, inputs, false)

case InSet(value, hset) =>
val valueDataType = value.dataType
val list = hset.map { setVal =>
Literal(setVal, valueDataType)
}.toSeq
// Change `InSet` to `In` expression
// We do Spark `InSet` optimization in native (DataFusion) side.
in(value, list, inputs, false)

case Not(In(value, list)) =>
in(value, list, inputs, true)

Expand Down
34 changes: 19 additions & 15 deletions spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -996,23 +996,27 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
}
}

test("test in/not in") {
Seq(false, true).foreach { dictionary =>
withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
val table = "names"
withTable(table) {
sql(s"create table $table(id int, name varchar(20)) using parquet")
sql(
s"insert into $table values(1, 'James'), (1, 'Jones'), (2, 'Smith'), (3, 'Smith')," +
"(NULL, 'Jones'), (4, NULL)")
test("test in(set)/not in(set)") {
Seq("100", "0").foreach { inSetThreshold =>
Seq(false, true).foreach { dictionary =>
withSQLConf(
SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> inSetThreshold,
"parquet.enable.dictionary" -> dictionary.toString) {
val table = "names"
withTable(table) {
sql(s"create table $table(id int, name varchar(20)) using parquet")
sql(
s"insert into $table values(1, 'James'), (1, 'Jones'), (2, 'Smith'), (3, 'Smith')," +
"(NULL, 'Jones'), (4, NULL)")

checkSparkAnswerAndOperator(s"SELECT * FROM $table WHERE id in (1, 2, 4, NULL)")
checkSparkAnswerAndOperator(
s"SELECT * FROM $table WHERE name in ('Smith', 'Brown', NULL)")
checkSparkAnswerAndOperator(s"SELECT * FROM $table WHERE id in (1, 2, 4, NULL)")
checkSparkAnswerAndOperator(
s"SELECT * FROM $table WHERE name in ('Smith', 'Brown', NULL)")

// TODO: why with not in, the plan is only `LocalTableScan`?
checkSparkAnswer(s"SELECT * FROM $table WHERE id not in (1)")
checkSparkAnswer(s"SELECT * FROM $table WHERE name not in ('Smith', 'Brown', NULL)")
// TODO: why with not in, the plan is only `LocalTableScan`?
checkSparkAnswer(s"SELECT * FROM $table WHERE id not in (1)")
checkSparkAnswer(s"SELECT * FROM $table WHERE name not in ('Smith', 'Brown', NULL)")
}
}
}
}
Expand Down

0 comments on commit cfce736

Please sign in to comment.