diff --git a/doc/spark_builtin_expr_coverage.txt b/doc/spark_builtin_expr_coverage.txt index 1ffdd2e60..9083a068c 100644 --- a/doc/spark_builtin_expr_coverage.txt +++ b/doc/spark_builtin_expr_coverage.txt @@ -57,9 +57,9 @@ |atanh |{FAILED, [{SELECT atanh(0);, Unsupported}]} | |avg |{FAILED, [{SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col);, Unsupported}]} | |base64 |{FAILED, [{SELECT base64('Spark SQL');, Unsupported}]} | -|bigint |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistryspar}]} | +|bigint |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistry}]} | |bin |{FAILED, [{SELECT bin(13);, Unsupported}]} | -|binary |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistryspar}]} | +|binary |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistry}]} | |bit_and |{FAILED, [{SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col);, Unsupported}]} | |bit_count |{FAILED, [{SELECT bit_count(0);, Unsupported}]} | |bit_get |{FAILED, [{SELECT bit_get(11, 0);, Unsupported}]} | @@ -68,7 +68,7 @@ |bit_xor |{FAILED, [{SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col);, Unsupported}]} | |bool_and |{FAILED, [{SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col);, Unsupported}]} | |bool_or |{FAILED, [{SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col);, Unsupported}]} | -|boolean |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistryspar}]} | +|boolean |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistry}]} | |bround |{FAILED, [{SELECT bround(2.5, 0);, Unsupported}]} | |btrim |{PASSED, [{SELECT btrim(' SparkSQL ');, OK}]} | |cardinality |{FAILED, [{SELECT cardinality(array('b', 'd', 'c', 'a'));, Unsupported}]} | @@ -108,7 +108,7 @@ |current_timestamp |{FAILED, [{SELECT current_timestamp();, Failed on something else. Check query manually}]} | |current_timezone |{PASSED, [{SELECT current_timezone();, OK}]} | |current_user |{PASSED, [{SELECT current_user();, OK}]} | -|date |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistryspar}]} | +|date |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistry}]} | |date_add |{FAILED, [{SELECT date_add('2016-07-30', 1);, Unsupported}]} | |date_diff |{FAILED, [{SELECT date_diff('2009-07-31', '2009-07-30');, Unsupported}]} | |date_format |{FAILED, [{SELECT date_format('2016-04-08', 'y');, Unsupported}]} | @@ -123,12 +123,12 @@ |dayofmonth |{FAILED, [{SELECT dayofmonth('2009-07-30');, Unsupported}]} | |dayofweek |{FAILED, [{SELECT dayofweek('2009-07-30');, Unsupported}]} | |dayofyear |{FAILED, [{SELECT dayofyear('2016-04-09');, Unsupported}]} | -|decimal |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistryspar}]} | +|decimal |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistry}]} | |decode |{FAILED, [{SELECT decode(encode('abc', 'utf-8'), 'utf-8');, Unsupported}]} | |degrees |{FAILED, [{SELECT degrees(3.141592653589793);, Unsupported}]} | |dense_rank |{FAILED, [{SELECT a, b, dense_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);, Unsupported}]} | |div |{FAILED, [{SELECT 3 div 2;, Unsupported}]} | -|double |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistryspar}]} | +|double |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistry}]} | |e |{FAILED, [{SELECT e();, Unsupported}]} | |element_at |{FAILED, [{SELECT element_at(array(1, 2, 3), 2);, Unsupported}]} | |elt |{FAILED, [{SELECT elt(1, 'scala', 'java');, Unsupported}]} | @@ -148,7 +148,7 @@ |first |{FAILED, [{SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col);, Unsupported}]} | |first_value |{FAILED, [{SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col);, Unsupported}]} | |flatten |{FAILED, [{SELECT flatten(array(array(1, 2), array(3, 4)));, Unsupported}]} | -|float |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistryspar}]} | +|float |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistry}]} | |floor |{PASSED, [{SELECT floor(-0.1);, OK}]} | |forall |{FAILED, [{SELECT forall(array(1, 2, 3), x -> x % 2 == 0);, Unsupported}]} | |format_number |{FAILED, [{SELECT format_number(12332.123456, 4);, Unsupported}]} | @@ -179,7 +179,7 @@ |input_file_block_start |{FAILED, [{SELECT input_file_block_start();, Unsupported}]} | |input_file_name |{FAILED, [{SELECT input_file_name();, Unsupported}]} | |instr |{PASSED, [{SELECT instr('SparkSQL', 'SQL');, OK}]} | -|int |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistryspar}]} | +|int |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistry}]} | |isnan |{FAILED, [{SELECT isnan(cast('NaN' as double));, Unsupported}]} | |isnotnull |{PASSED, [{SELECT isnotnull(1);, OK}]} | |isnull |{PASSED, [{SELECT isnull(1);, OK}]} | @@ -324,7 +324,7 @@ |size |{FAILED, [{SELECT size(array('b', 'd', 'c', 'a'));, Unsupported}]} | |skewness |{FAILED, [{SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);, Unsupported}]} | |slice |{FAILED, [{SELECT slice(array(1, 2, 3, 4), 2, 2);, Unsupported}]} | -|smallint |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistryspar}]} | +|smallint |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistry}]} | |some |{FAILED, [{SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col);, Unsupported}]} | |sort_array |{FAILED, [{SELECT sort_array(array('b', 'd', null, 'c', 'a'), true);, Unsupported}]} | |soundex |{FAILED, [{SELECT soundex('Miller');, Unsupported}]} | @@ -340,7 +340,7 @@ |stddev_pop |{FAILED, [{SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col);, Unsupported}]} | |stddev_samp |{FAILED, [{SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col);, Unsupported}]} | |str_to_map |{FAILED, [{SELECT str_to_map('a:1,b:2,c:3', ',', ':');, Unsupported}]} | -|string |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistryspar}]} | +|string |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistry}]} | |struct |{FAILED, [{SELECT struct(1, 2, 3);, Unsupported}]} | |substr |{FAILED, [{SELECT substr('Spark SQL', 5);, Failed on native side}]} | |substring |{FAILED, [{SELECT substring('Spark SQL', 5);, Failed on native side}]} | @@ -348,11 +348,11 @@ |sum |{FAILED, [{SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col);, Unsupported}]} | |tan |{PASSED, [{SELECT tan(0);, OK}]} | |tanh |{FAILED, [{SELECT tanh(0);, Unsupported}]} | -|timestamp |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistryspar}]} | +|timestamp |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistry}]} | |timestamp_micros |{FAILED, [{SELECT timestamp_micros(1230219000123123);, Unsupported}]} | |timestamp_millis |{FAILED, [{SELECT timestamp_millis(1230219000123);, Unsupported}]} | |timestamp_seconds |{FAILED, [{SELECT timestamp_seconds(1230219000);, Unsupported}]} | -|tinyint |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistryspar}]} | +|tinyint |{SKIPPED, [{, No examples found in spark.sessionState.functionRegistry}]} | |to_binary |{FAILED, [{SELECT to_binary('abc', 'utf-8');, Unsupported}]} | |to_char |{FAILED, [{SELECT to_char(454, '999');, Unsupported}]} | |to_csv |{FAILED, [{SELECT to_csv(named_struct('a', 1, 'b', 2));, Unsupported}]} | diff --git a/doc/spark_builtin_expr_coverage_agg.txt b/doc/spark_builtin_expr_coverage_agg.txt index c0ef6d8e1..013a84a66 100644 --- a/doc/spark_builtin_expr_coverage_agg.txt +++ b/doc/spark_builtin_expr_coverage_agg.txt @@ -1,9 +1,9 @@ -+-------+------------------------------------------------------------+---+ -|result |details |cnt| -+-------+------------------------------------------------------------+---+ -|FAILED |Unsupported |282| -|FAILED |Failed on native side |16 | -|PASSED |OK |101| -|SKIPPED|No examples found in spark.sessionState.functionRegistryspar|12 | -|FAILED |Failed on something else. Check query manually |4 | -+-------+------------------------------------------------------------+---+ ++-------+--------------------------------------------------------+---+ +|result |details |cnt| ++-------+--------------------------------------------------------+---+ +|FAILED |Unsupported |282| +|FAILED |Failed on native side |16 | +|FAILED |Failed on something else. Check query manually |4 | +|PASSED |OK |101| +|SKIPPED|No examples found in spark.sessionState.functionRegistry|12 | ++-------+--------------------------------------------------------+---+ diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala index 534ab57de..e6bf2e616 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala @@ -21,28 +21,31 @@ package org.apache.comet import java.nio.charset.StandardCharsets import java.nio.file.{Files, Paths} + import scala.collection.mutable + +import org.scalatest.Ignore import org.scalatest.exceptions.TestFailedException + import org.apache.spark.sql.CometTestBase import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper -import org.scalatest.Ignore /** - * Manual test to calculate Spark builtin functions coverage support by the Comet + * Manual test to calculate Spark builtin expressions coverage support by the Comet * * The test will update files doc/spark_builtin_expr_coverage.txt, * doc/spark_builtin_expr_coverage_agg.txt */ -@Ignore +//@Ignore class CometExpressionCoverageSuite extends CometTestBase with AdaptiveSparkPlanHelper { import testImplicits._ - val rawCoverageFilePath = "doc/spark_builtin_expr_coverage.txt" - val aggCoverageFilePath = "doc/spark_builtin_expr_coverage_agg.txt" + private val rawCoverageFilePath = "doc/spark_builtin_expr_coverage.txt" + private val aggCoverageFilePath = "doc/spark_builtin_expr_coverage_agg.txt" - test("Test Spark builtin functions coverage") { + test("Test Spark builtin expressions coverage") { val queryPattern = """(?i)SELECT (.+?);""".r val valuesPattern = """(?i)FROM VALUES(.+?);""".r val selectPattern = """(i?)SELECT(.+?)FROM""".r @@ -78,36 +81,44 @@ class CometExpressionCoverageSuite extends CometTestBase with AdaptiveSparkPlanH case (Some(s), Some(v)) => testSingleLineQuery(s"select * $v", s"$s tbl") - case _ => sys.error(s"Query $q cannot be parsed properly") + case _ => + resultsMap.put( + funcName, + CoverageResult("FAILED", Seq((q, "Cannot parse properly")))) } } else { // Plain example like SELECT cos(0); testSingleLineQuery("select 'dummy' x", s"${q.dropRight(1)}, x from tbl") } - CoverageResult("PASSED", Seq((q, "OK"))) + CoverageResult(CoverageResultStatus.Passed.toString, Seq((q, "OK"))) } catch { case e: TestFailedException if e.message.getOrElse("").contains("Expected only Comet native operators") => - CoverageResult("FAILED", Seq((q, "Unsupported"))) + CoverageResult(CoverageResultStatus.Failed.toString, Seq((q, "Unsupported"))) case e if e.getMessage.contains("CometNativeException") => - CoverageResult("FAILED", Seq((q, "Failed on native side"))) + CoverageResult( + CoverageResultStatus.Failed.toString, + Seq((q, "Failed on native side"))) case _ => - CoverageResult("FAILED", Seq((q, "Failed on something else. Check query manually"))) + CoverageResult( + CoverageResultStatus.Failed.toString, + Seq((q, "Failed on something else. Check query manually"))) } resultsMap.put(funcName, queryResult) + case (funcName, List()) => resultsMap.put( funcName, CoverageResult( - "SKIPPED", - Seq(("", "No examples found in spark.sessionState.functionRegistryspar")))) + CoverageResultStatus.Skipped.toString, + Seq(("", "No examples found in spark.sessionState.functionRegistry")))) } // TODO: convert results into HTML resultsMap.toSeq.toDF("name", "details").createOrReplaceTempView("t") val str_agg = showString( spark.sql( - "select result, d._2 as details, count(1) cnt from (select name, t.details.result, explode_outer(t.details.details) as d from t) group by 1, 2"), + "select result, d._2 as details, count(1) cnt from (select name, t.details.result, explode_outer(t.details.details) as d from t) group by 1, 2 order by 1"), 1000, 0) Files.write(Paths.get(aggCoverageFilePath), str_agg.getBytes(StandardCharsets.UTF_8)) @@ -118,3 +129,11 @@ class CometExpressionCoverageSuite extends CometTestBase with AdaptiveSparkPlanH } case class CoverageResult(result: String, details: Seq[(String, String)]) + +object CoverageResultStatus extends Enumeration { + type CoverageResultStatus = Value + + val Failed: Value = Value("FAILED") + val Passed: Value = Value("PASSED") + val Skipped: Value = Value("SKIPPED") +}