Skip to content

Commit

Permalink
New UT is integrated with the legacy API
Browse files Browse the repository at this point in the history
  • Loading branch information
erenavsarogullari committed Feb 25, 2024
1 parent 2cdf0c0 commit fc55b8a
Showing 1 changed file with 36 additions and 60 deletions.
96 changes: 36 additions & 60 deletions spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,13 @@ package org.apache.comet

import java.util

import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable`

import org.apache.hadoop.fs.Path
import org.apache.spark.sql.{CometTestBase, DataFrame, Row}
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
import org.apache.spark.sql.functions.{col, expr}
import org.apache.spark.sql.functions.expr
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE
import org.apache.spark.sql.types.{DataTypes, Decimal, DecimalType, StructType}
import org.apache.spark.sql.types.{Decimal, DecimalType, StructType}

import org.apache.comet.CometSparkSessionExtensions.{isSpark32, isSpark34Plus}

Expand Down Expand Up @@ -453,20 +451,20 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
val view = "str_view"
withView(view) {
sql(s"""create temporary view $view as select c, v from values
| (null, null), (null, null),
| (null, 'S'), (null, 'S'),
| ('N', 'N '), ('N', 'N '),
| ('Ne', 'Sp'), ('Ne', 'Sp'),
| ('Net ', 'Spa '), ('Net ', 'Spa '),
| ('NetE', 'Spar'), ('NetE', 'Spar'),
| ('NetEa ', 'Spark '), ('NetEa ', 'Spark '),
| ('NetEas ', 'Spark'), ('NetEas ', 'Spark'),
| ('NetEase', 'Spark-'), ('NetEase', 'Spark-') t(c, v);""".stripMargin)
| (null, null), (null, null),
| (null, 'S'), (null, 'S'),
| ('N', 'N '), ('N', 'N '),
| ('Ne', 'Sp'), ('Ne', 'Sp'),
| ('Net ', 'Spa '), ('Net ', 'Spa '),
| ('NetE', 'Spar'), ('NetE', 'Spar'),
| ('NetEa ', 'Spark '), ('NetEa ', 'Spark '),
| ('NetEas ', 'Spark'), ('NetEas ', 'Spark'),
| ('NetEase', 'Spark-'), ('NetEase', 'Spark-') t(c, v);""".stripMargin)
sql(
s"create table $table(c7 char(7), c8 char(8), v varchar(6), s string) using parquet;")
sql(s"insert into $table select c, c, v, c from $view;")
val df = sql(s"""select substring(c7, 2), substring(c8, 2),
| substring(v, 3), substring(s, 2) from $table;""".stripMargin)
| substring(v, 3), substring(s, 2) from $table;""".stripMargin)

val expected = Row(" ", " ", "", "") ::
Row(null, null, "", null) :: Row(null, null, null, null) ::
Expand Down Expand Up @@ -1155,11 +1153,11 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
withTable("t1") {
sql("CREATE TABLE t1(flag LONG, cal_dt DATE) USING PARQUET PARTITIONED BY (cal_dt)")
sql("""
|INSERT INTO t1 VALUES
|(2, date'2021-06-27'),
|(2, date'2021-06-28'),
|(2, date'2021-06-29'),
|(2, date'2021-06-30')""".stripMargin)
|INSERT INTO t1 VALUES
|(2, date'2021-06-27'),
|(2, date'2021-06-28'),
|(2, date'2021-06-29'),
|(2, date'2021-06-30')""".stripMargin)
checkSparkAnswerAndOperator(sql("SELECT CAST(cal_dt as STRING) FROM t1"))
checkSparkAnswer("SHOW PARTITIONS t1")
}
Expand Down Expand Up @@ -1262,6 +1260,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {

test("Decimal random number tests") {
val rand = scala.util.Random

def makeNum(p: Int, s: Int): String = {
val int1 = rand.nextLong()
val int2 = rand.nextLong().abs
Expand Down Expand Up @@ -1306,48 +1305,25 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
}

test("test cast utf8 to boolean as compatible with Spark") {
withSQLConf(
CometConf.COMET_ENABLED.key -> "true",
CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") {
withTable("test_table1", "test_table2", "test_table3", "test_table4") {
// Supported boolean values as true by both Arrow and Spark
val inputDF = Seq("t", "true", "y", "yes", "1", "T", "TrUe", "Y", "YES").toDF("c1")
inputDF.write.format("parquet").saveAsTable("test_table1")
val resultDF = this.spark
.table("test_table1")
.withColumn("converted", col("c1").cast(DataTypes.BooleanType))
val resultArr = resultDF.collectAsList().toList
resultArr.foreach(x => assert(x.get(1) == true))

// Supported boolean values as false by both Arrow and Spark
val inputDF2 = Seq("f", "false", "n", "no", "0", "F", "FaLSe", "N", "No").toDF("c1")
inputDF2.write.format("parquet").saveAsTable("test_table2")
val resultDF2 = this.spark
.table("test_table2")
.withColumn("converted", col("c1").cast(DataTypes.BooleanType))
val resultArr2 = resultDF2.collectAsList().toList
resultArr2.foreach(x => assert(x.get(1) == false))

// Supported boolean values by Arrow but not Spark
val inputDF3 =
Seq("TR", "FA", "tr", "tru", "ye", "on", "fa", "fal", "fals", "of", "off").toDF("c1")
inputDF3.write.format("parquet").saveAsTable("test_table3")
val resultDF3 = this.spark
.table("test_table3")
.withColumn("converted", col("c1").cast(DataTypes.BooleanType))
val resultArr3 = resultDF3.collectAsList().toList
resultArr3.foreach(x => assert(x.get(1) == null))

// Invalid boolean casting values for Arrow and Spark
val inputDF4 = Seq("car", "Truck").toDF("c1")
inputDF4.write.format("parquet").saveAsTable("test_table4")
val resultDF4 = this.spark
.table("test_table4")
.withColumn("converted", col("c1").cast(DataTypes.BooleanType))
val resultArr4 = resultDF4.collectAsList().toList
resultArr4.foreach(x => assert(x.get(1) == null))
def testConvertedColumn(inputValues: Seq[String]): Unit = {
val table = "test_table"
withTable(table) {
val values = inputValues.map(x => s"('$x')").mkString(",")
sql(s"create table $table(base_column char(20)) using parquet")
sql(s"insert into $table values $values")
checkSparkAnswerAndOperator(
s"select base_column, cast(base_column as boolean) as converted_column from $table")
}
}
}

// Supported boolean values as true by both Arrow and Spark
testConvertedColumn(inputValues = Seq("t", "true", "y", "yes", "1", "T", "TrUe", "Y", "YES"))
// Supported boolean values as false by both Arrow and Spark
testConvertedColumn(inputValues = Seq("f", "false", "n", "no", "0", "F", "FaLSe", "N", "No"))
// Supported boolean values by Arrow but not Spark
testConvertedColumn(inputValues =
Seq("TR", "FA", "tr", "tru", "ye", "on", "fa", "fal", "fals", "of", "off"))
// Invalid boolean casting values for Arrow and Spark
testConvertedColumn(inputValues = Seq("car", "Truck"))
}
}

0 comments on commit fc55b8a

Please sign in to comment.