From fc55b8a26ad0429e2eccd4bb366429913685251c Mon Sep 17 00:00:00 2001 From: Eren Avsarogullari Date: Sat, 24 Feb 2024 20:46:01 -0800 Subject: [PATCH] New UT is integrated with the legacy API --- .../apache/comet/CometExpressionSuite.scala | 96 +++++++------------ 1 file changed, 36 insertions(+), 60 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 0effa9667b..facd2acc2c 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -21,15 +21,13 @@ package org.apache.comet import java.util -import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable` - import org.apache.hadoop.fs.Path import org.apache.spark.sql.{CometTestBase, DataFrame, Row} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper -import org.apache.spark.sql.functions.{col, expr} +import org.apache.spark.sql.functions.expr import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE -import org.apache.spark.sql.types.{DataTypes, Decimal, DecimalType, StructType} +import org.apache.spark.sql.types.{Decimal, DecimalType, StructType} import org.apache.comet.CometSparkSessionExtensions.{isSpark32, isSpark34Plus} @@ -453,20 +451,20 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { val view = "str_view" withView(view) { sql(s"""create temporary view $view as select c, v from values - | (null, null), (null, null), - | (null, 'S'), (null, 'S'), - | ('N', 'N '), ('N', 'N '), - | ('Ne', 'Sp'), ('Ne', 'Sp'), - | ('Net ', 'Spa '), ('Net ', 'Spa '), - | ('NetE', 'Spar'), ('NetE', 'Spar'), - | ('NetEa ', 'Spark '), ('NetEa ', 'Spark '), - | ('NetEas ', 'Spark'), ('NetEas ', 'Spark'), - | ('NetEase', 'Spark-'), ('NetEase', 'Spark-') t(c, v);""".stripMargin) + | (null, null), (null, null), + | (null, 'S'), (null, 'S'), + | ('N', 'N '), ('N', 'N '), + | ('Ne', 'Sp'), ('Ne', 'Sp'), + | ('Net ', 'Spa '), ('Net ', 'Spa '), + | ('NetE', 'Spar'), ('NetE', 'Spar'), + | ('NetEa ', 'Spark '), ('NetEa ', 'Spark '), + | ('NetEas ', 'Spark'), ('NetEas ', 'Spark'), + | ('NetEase', 'Spark-'), ('NetEase', 'Spark-') t(c, v);""".stripMargin) sql( s"create table $table(c7 char(7), c8 char(8), v varchar(6), s string) using parquet;") sql(s"insert into $table select c, c, v, c from $view;") val df = sql(s"""select substring(c7, 2), substring(c8, 2), - | substring(v, 3), substring(s, 2) from $table;""".stripMargin) + | substring(v, 3), substring(s, 2) from $table;""".stripMargin) val expected = Row(" ", " ", "", "") :: Row(null, null, "", null) :: Row(null, null, null, null) :: @@ -1155,11 +1153,11 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { withTable("t1") { sql("CREATE TABLE t1(flag LONG, cal_dt DATE) USING PARQUET PARTITIONED BY (cal_dt)") sql(""" - |INSERT INTO t1 VALUES - |(2, date'2021-06-27'), - |(2, date'2021-06-28'), - |(2, date'2021-06-29'), - |(2, date'2021-06-30')""".stripMargin) + |INSERT INTO t1 VALUES + |(2, date'2021-06-27'), + |(2, date'2021-06-28'), + |(2, date'2021-06-29'), + |(2, date'2021-06-30')""".stripMargin) checkSparkAnswerAndOperator(sql("SELECT CAST(cal_dt as STRING) FROM t1")) checkSparkAnswer("SHOW PARTITIONS t1") } @@ -1262,6 +1260,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { test("Decimal random number tests") { val rand = scala.util.Random + def makeNum(p: Int, s: Int): String = { val int1 = rand.nextLong() val int2 = rand.nextLong().abs @@ -1306,48 +1305,25 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } test("test cast utf8 to boolean as compatible with Spark") { - withSQLConf( - CometConf.COMET_ENABLED.key -> "true", - CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") { - withTable("test_table1", "test_table2", "test_table3", "test_table4") { - // Supported boolean values as true by both Arrow and Spark - val inputDF = Seq("t", "true", "y", "yes", "1", "T", "TrUe", "Y", "YES").toDF("c1") - inputDF.write.format("parquet").saveAsTable("test_table1") - val resultDF = this.spark - .table("test_table1") - .withColumn("converted", col("c1").cast(DataTypes.BooleanType)) - val resultArr = resultDF.collectAsList().toList - resultArr.foreach(x => assert(x.get(1) == true)) - - // Supported boolean values as false by both Arrow and Spark - val inputDF2 = Seq("f", "false", "n", "no", "0", "F", "FaLSe", "N", "No").toDF("c1") - inputDF2.write.format("parquet").saveAsTable("test_table2") - val resultDF2 = this.spark - .table("test_table2") - .withColumn("converted", col("c1").cast(DataTypes.BooleanType)) - val resultArr2 = resultDF2.collectAsList().toList - resultArr2.foreach(x => assert(x.get(1) == false)) - - // Supported boolean values by Arrow but not Spark - val inputDF3 = - Seq("TR", "FA", "tr", "tru", "ye", "on", "fa", "fal", "fals", "of", "off").toDF("c1") - inputDF3.write.format("parquet").saveAsTable("test_table3") - val resultDF3 = this.spark - .table("test_table3") - .withColumn("converted", col("c1").cast(DataTypes.BooleanType)) - val resultArr3 = resultDF3.collectAsList().toList - resultArr3.foreach(x => assert(x.get(1) == null)) - - // Invalid boolean casting values for Arrow and Spark - val inputDF4 = Seq("car", "Truck").toDF("c1") - inputDF4.write.format("parquet").saveAsTable("test_table4") - val resultDF4 = this.spark - .table("test_table4") - .withColumn("converted", col("c1").cast(DataTypes.BooleanType)) - val resultArr4 = resultDF4.collectAsList().toList - resultArr4.foreach(x => assert(x.get(1) == null)) + def testConvertedColumn(inputValues: Seq[String]): Unit = { + val table = "test_table" + withTable(table) { + val values = inputValues.map(x => s"('$x')").mkString(",") + sql(s"create table $table(base_column char(20)) using parquet") + sql(s"insert into $table values $values") + checkSparkAnswerAndOperator( + s"select base_column, cast(base_column as boolean) as converted_column from $table") } } - } + // Supported boolean values as true by both Arrow and Spark + testConvertedColumn(inputValues = Seq("t", "true", "y", "yes", "1", "T", "TrUe", "Y", "YES")) + // Supported boolean values as false by both Arrow and Spark + testConvertedColumn(inputValues = Seq("f", "false", "n", "no", "0", "F", "FaLSe", "N", "No")) + // Supported boolean values by Arrow but not Spark + testConvertedColumn(inputValues = + Seq("TR", "FA", "tr", "tru", "ye", "on", "fa", "fal", "fals", "of", "off")) + // Invalid boolean casting values for Arrow and Spark + testConvertedColumn(inputValues = Seq("car", "Truck")) + } }