Skip to content

Commit

Permalink
Addressed review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
erenavsarogullari committed Feb 24, 2024
1 parent 4c2eecd commit 2cdf0c0
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 50 deletions.
52 changes: 50 additions & 2 deletions spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@ package org.apache.comet

import java.util

import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable`

import org.apache.hadoop.fs.Path
import org.apache.spark.sql.{CometTestBase, DataFrame, Row}
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
import org.apache.spark.sql.functions.expr
import org.apache.spark.sql.functions.{col, expr}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE
import org.apache.spark.sql.types.{Decimal, DecimalType, StructType}
import org.apache.spark.sql.types.{DataTypes, Decimal, DecimalType, StructType}

import org.apache.comet.CometSparkSessionExtensions.{isSpark32, isSpark34Plus}

Expand Down Expand Up @@ -1302,4 +1304,50 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
}
}
}

test("test cast utf8 to boolean as compatible with Spark") {
withSQLConf(
CometConf.COMET_ENABLED.key -> "true",
CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
withTable("test_table1", "test_table2", "test_table3", "test_table4") {
// Supported boolean values as true by both Arrow and Spark
val inputDF = Seq("t", "true", "y", "yes", "1", "T", "TrUe", "Y", "YES").toDF("c1")
inputDF.write.format("parquet").saveAsTable("test_table1")
val resultDF = this.spark
.table("test_table1")
.withColumn("converted", col("c1").cast(DataTypes.BooleanType))
val resultArr = resultDF.collectAsList().toList
resultArr.foreach(x => assert(x.get(1) == true))

// Supported boolean values as false by both Arrow and Spark
val inputDF2 = Seq("f", "false", "n", "no", "0", "F", "FaLSe", "N", "No").toDF("c1")
inputDF2.write.format("parquet").saveAsTable("test_table2")
val resultDF2 = this.spark
.table("test_table2")
.withColumn("converted", col("c1").cast(DataTypes.BooleanType))
val resultArr2 = resultDF2.collectAsList().toList
resultArr2.foreach(x => assert(x.get(1) == false))

// Supported boolean values by Arrow but not Spark
val inputDF3 =
Seq("TR", "FA", "tr", "tru", "ye", "on", "fa", "fal", "fals", "of", "off").toDF("c1")
inputDF3.write.format("parquet").saveAsTable("test_table3")
val resultDF3 = this.spark
.table("test_table3")
.withColumn("converted", col("c1").cast(DataTypes.BooleanType))
val resultArr3 = resultDF3.collectAsList().toList
resultArr3.foreach(x => assert(x.get(1) == null))

// Invalid boolean casting values for Arrow and Spark
val inputDF4 = Seq("car", "Truck").toDF("c1")
inputDF4.write.format("parquet").saveAsTable("test_table4")
val resultDF4 = this.spark
.table("test_table4")
.withColumn("converted", col("c1").cast(DataTypes.BooleanType))
val resultArr4 = resultDF4.collectAsList().toList
resultArr4.foreach(x => assert(x.get(1) == null))
}
}
}

}
49 changes: 1 addition & 48 deletions spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.apache.comet.exec

import scala.collection.JavaConverters._
import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable`
import scala.collection.mutable
import scala.util.Random

Expand All @@ -38,10 +37,9 @@ import org.apache.spark.sql.execution.{CollectLimitExec, ProjectExec, UnionExec}
import org.apache.spark.sql.execution.exchange.BroadcastExchangeExec
import org.apache.spark.sql.execution.joins.{BroadcastNestedLoopJoinExec, CartesianProductExec, SortMergeJoinExec}
import org.apache.spark.sql.execution.window.WindowExec
import org.apache.spark.sql.functions.{col, date_add, expr}
import org.apache.spark.sql.functions.{date_add, expr}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE
import org.apache.spark.sql.types.DataTypes
import org.apache.spark.unsafe.types.UTF8String

import org.apache.comet.CometConf
Expand Down Expand Up @@ -220,51 +218,6 @@ class CometExecSuite extends CometTestBase {
}
}

test("test cast utf8 to boolean as compatible with Spark") {
withSQLConf(
CometConf.COMET_ENABLED.key -> "true",
CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
withTable("test_table1", "test_table2", "test_table3", "test_table4") {
// Supported boolean values as true by both Arrow and Spark
val inputDF = Seq("t", "true", "y", "yes", "1", "T", "TrUe", "Y", "YES").toDF("c1")
inputDF.write.format("parquet").saveAsTable("test_table1")
val resultDF = this.spark
.table("test_table1")
.withColumn("converted", col("c1").cast(DataTypes.BooleanType))
val resultArr = resultDF.collectAsList().toList
resultArr.foreach(x => assert(x.get(1) == true))

// Supported boolean values as false by both Arrow and Spark
val inputDF2 = Seq("f", "false", "n", "no", "0", "F", "FaLSe", "N", "No").toDF("c1")
inputDF2.write.format("parquet").saveAsTable("test_table2")
val resultDF2 = this.spark
.table("test_table2")
.withColumn("converted", col("c1").cast(DataTypes.BooleanType))
val resultArr2 = resultDF2.collectAsList().toList
resultArr2.foreach(x => assert(x.get(1) == false))

// Supported boolean values by Arrow but not Spark
val inputDF3 =
Seq("TR", "FA", "tr", "tru", "ye", "on", "fa", "fal", "fals", "of", "off").toDF("c1")
inputDF3.write.format("parquet").saveAsTable("test_table3")
val resultDF3 = this.spark
.table("test_table3")
.withColumn("converted", col("c1").cast(DataTypes.BooleanType))
val resultArr3 = resultDF3.collectAsList().toList
resultArr3.foreach(x => assert(x.get(1) == null))

// Invalid boolean casting values for Arrow and Spark
val inputDF4 = Seq("car", "Truck").toDF("c1")
inputDF4.write.format("parquet").saveAsTable("test_table4")
val resultDF4 = this.spark
.table("test_table4")
.withColumn("converted", col("c1").cast(DataTypes.BooleanType))
val resultArr4 = resultDF4.collectAsList().toList
resultArr4.foreach(x => assert(x.get(1) == null))
}
}
}

test(
"fix: ReusedExchangeExec + CometShuffleExchangeExec under QueryStageExec " +
"should be CometRoot") {
Expand Down

0 comments on commit 2cdf0c0

Please sign in to comment.