From a668a8657a16496781075a014c6009d038c3fa1b Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 4 Jun 2024 11:09:32 -0600 Subject: [PATCH 1/6] feat: Add specific fuzz tests for cast and try_cast and fix NPE found during fuzz testing (#514) * Varius improvements to fuzz testing tool * Fix NPE in QueryPlanSerde handling of trim expression * format --- fuzz-testing/README.md | 5 +++-- .../scala/org/apache/comet/fuzz/DataGen.scala | 3 ++- .../org/apache/comet/fuzz/QueryGen.scala | 20 +++++++++++++++++-- .../org/apache/comet/fuzz/QueryRunner.scala | 10 ++++++++-- .../scala/org/apache/comet/fuzz/Utils.scala | 4 ++-- .../apache/comet/serde/QueryPlanSerde.scala | 4 ++-- 6 files changed, 35 insertions(+), 11 deletions(-) diff --git a/fuzz-testing/README.md b/fuzz-testing/README.md index 56af359f2..076ff6aea 100644 --- a/fuzz-testing/README.md +++ b/fuzz-testing/README.md @@ -30,8 +30,8 @@ Comet Fuzz is inspired by the [SparkFuzz](https://ir.cwi.nl/pub/30222) paper fro Planned areas of improvement: +- ANSI mode - Support for all data types, expressions, and operators supported by Comet -- Explicit casts - Unary and binary arithmetic expressions - IF and CASE WHEN expressions - Complex (nested) expressions @@ -91,7 +91,8 @@ $SPARK_HOME/bin/spark-submit \ --conf spark.comet.exec.shuffle.enabled=true \ --conf spark.comet.exec.shuffle.mode=auto \ --jars $COMET_JAR \ - --driver-class-path $COMET_JAR \ + --conf spark.driver.extraClassPath=$COMET_JAR \ + --conf spark.executor.extraClassPath=$COMET_JAR \ --class org.apache.comet.fuzz.Main \ target/comet-fuzz-spark3.4_2.12-0.1.0-SNAPSHOT-jar-with-dependencies.jar \ run --num-files=2 --filename=queries.sql diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/DataGen.scala b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/DataGen.scala index 47a6bd879..9f9f772b7 100644 --- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/DataGen.scala +++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/DataGen.scala @@ -50,7 +50,8 @@ object DataGen { // generate schema using random data types val fields = Range(0, numColumns) - .map(i => StructField(s"c$i", Utils.randomWeightedChoice(Meta.dataTypes), nullable = true)) + .map(i => + StructField(s"c$i", Utils.randomWeightedChoice(Meta.dataTypes, r), nullable = true)) val schema = StructType(fields) // generate columnar data diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryGen.scala b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryGen.scala index 1daa26200..7584e76ce 100644 --- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryGen.scala +++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryGen.scala @@ -42,10 +42,11 @@ object QueryGen { val uniqueQueries = mutable.HashSet[String]() for (_ <- 0 until numQueries) { - val sql = r.nextInt().abs % 3 match { + val sql = r.nextInt().abs % 4 match { case 0 => generateJoin(r, spark, numFiles) case 1 => generateAggregate(r, spark, numFiles) case 2 => generateScalar(r, spark, numFiles) + case 3 => generateCast(r, spark, numFiles) } if (!uniqueQueries.contains(sql)) { uniqueQueries += sql @@ -91,6 +92,21 @@ object QueryGen { s"ORDER BY ${args.mkString(", ")};" } + private def generateCast(r: Random, spark: SparkSession, numFiles: Int): String = { + val tableName = s"test${r.nextInt(numFiles)}" + val table = spark.table(tableName) + + val toType = Utils.randomWeightedChoice(Meta.dataTypes, r).sql + val arg = Utils.randomChoice(table.columns, r) + + // We test both `cast` and `try_cast` to cover LEGACY and TRY eval modes. It is not + // recommended to run Comet Fuzz with ANSI enabled currently. + // Example SELECT c0, cast(c0 as float), try_cast(c0 as float) FROM test0 + s"SELECT $arg, cast($arg as $toType), try_cast($arg as $toType) " + + s"FROM $tableName " + + s"ORDER BY $arg;" + } + private def generateJoin(r: Random, spark: SparkSession, numFiles: Int): String = { val leftTableName = s"test${r.nextInt(numFiles)}" val rightTableName = s"test${r.nextInt(numFiles)}" @@ -101,7 +117,7 @@ object QueryGen { val rightCol = Utils.randomChoice(rightTable.columns, r) val joinTypes = Seq(("INNER", 0.4), ("LEFT", 0.3), ("RIGHT", 0.3)) - val joinType = Utils.randomWeightedChoice(joinTypes) + val joinType = Utils.randomWeightedChoice(joinTypes, r) val leftColProjection = leftTable.columns.map(c => s"l.$c").mkString(", ") val rightColProjection = rightTable.columns.map(c => s"r.$c").mkString(", ") diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala index 49f9fc3bd..b2ceae9d0 100644 --- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala +++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala @@ -19,7 +19,7 @@ package org.apache.comet.fuzz -import java.io.{BufferedWriter, FileWriter} +import java.io.{BufferedWriter, FileWriter, PrintWriter} import scala.io.Source @@ -109,7 +109,12 @@ object QueryRunner { case e: Exception => // the query worked in Spark but failed in Comet, so this is likely a bug in Comet showSQL(w, sql) - w.write(s"[ERROR] Query failed in Comet: ${e.getMessage}\n") + w.write(s"[ERROR] Query failed in Comet: ${e.getMessage}:\n") + w.write("```\n") + val p = new PrintWriter(w) + e.printStackTrace(p) + p.close() + w.write("```\n") } // flush after every query so that results are saved in the event of the driver crashing @@ -134,6 +139,7 @@ object QueryRunner { private def formatRow(row: Row): String = { row.toSeq .map { + case null => "NULL" case v: Array[Byte] => v.mkString case other => other.toString } diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/Utils.scala b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/Utils.scala index 19f9695a9..4d51c60e5 100644 --- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/Utils.scala +++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/Utils.scala @@ -27,9 +27,9 @@ object Utils { list(r.nextInt(list.length)) } - def randomWeightedChoice[T](valuesWithWeights: Seq[(T, Double)]): T = { + def randomWeightedChoice[T](valuesWithWeights: Seq[(T, Double)], r: Random): T = { val totalWeight = valuesWithWeights.map(_._2).sum - val randomValue = Random.nextDouble() * totalWeight + val randomValue = r.nextDouble() * totalWeight var cumulativeWeight = 0.0 for ((value, weight) <- valuesWithWeights) { diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 439ec4ebb..8d81b57c4 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -2169,10 +2169,10 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim val trimCast = Cast(trimStr.get, StringType) val trimExpr = exprToProtoInternal(trimCast, inputs) val optExpr = scalarExprToProto(trimType, srcExpr, trimExpr) - optExprWithInfo(optExpr, expr, null, srcCast, trimCast) + optExprWithInfo(optExpr, expr, srcCast, trimCast) } else { val optExpr = scalarExprToProto(trimType, srcExpr) - optExprWithInfo(optExpr, expr, null, srcCast) + optExprWithInfo(optExpr, expr, srcCast) } } From b3ba82f54e2a35fffbde0e5c21b51822e3a2fb51 Mon Sep 17 00:00:00 2001 From: Oleks V Date: Wed, 5 Jun 2024 08:00:55 -0700 Subject: [PATCH 2/6] Minor: Generate the supported Spark builtin expression list into MD file (#455) * Coverage: Add a manual test for DF supporting Spark expressions directly Co-authored-by: advancedxy Co-authored-by: Andy Grove --- docs/source/user-guide/overview.md | 2 +- docs/spark_builtin_expr_coverage.txt | 836 +++++++++--------- docs/spark_expressions_support.md | 475 ++++++++++ .../comet/CometExpressionCoverageSuite.scala | 303 ++++++- .../org/apache/spark/sql/CometTestBase.scala | 6 +- 5 files changed, 1172 insertions(+), 450 deletions(-) create mode 100644 docs/spark_expressions_support.md diff --git a/docs/source/user-guide/overview.md b/docs/source/user-guide/overview.md index ff73176d8..b5425d774 100644 --- a/docs/source/user-guide/overview.md +++ b/docs/source/user-guide/overview.md @@ -29,7 +29,7 @@ Comet aims to support: - a native Parquet implementation, including both reader and writer - full implementation of Spark operators, including Filter/Project/Aggregation/Join/Exchange etc. -- full implementation of Spark built-in expressions +- full implementation of Spark built-in expressions. - a UDF framework for users to migrate their existing UDF to native ## Architecture diff --git a/docs/spark_builtin_expr_coverage.txt b/docs/spark_builtin_expr_coverage.txt index 7486e6380..8e71cb8d2 100644 --- a/docs/spark_builtin_expr_coverage.txt +++ b/docs/spark_builtin_expr_coverage.txt @@ -1,419 +1,417 @@ -+---------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------+-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -|name |query |result |cometMessage |datafusionMessage | -+---------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------+-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -|! |SELECT ! true; |PASSED |OK |std_err: SQL error: ParserError("Expected an expression:, found: !")\n | -|% |SELECT 2 % 1.8; |PASSED |OK |OK | -|& |SELECT 3 & 5; |PASSED |OK |OK | -|* |SELECT 2 * 3; |PASSED |OK |OK | -|+ |SELECT 1 + 2; |PASSED |OK |OK | -|- |SELECT 2 - 1; |PASSED |OK |OK | -|/ |SELECT 3 / 2; |PASSED |OK |OK | -|< |SELECT 1 < 2; |PASSED |OK |OK | -|<= |SELECT 2 <= 2; |PASSED |OK |OK | -|<=> |SELECT 2 <=> 2; |PASSED |OK |std_err: This feature is not implemented: Unsupported SQL binary operator Spaceship\n | -|= |SELECT 2 = 2; |PASSED |OK |OK | -|== |SELECT 2 == 2; |PASSED |OK |OK | -|> |SELECT 2 > 1; |PASSED |OK |OK | -|>= |SELECT 2 >= 1; |PASSED |OK |OK | -|^ |SELECT 3 ^ 5; |PASSED |OK |OK | -|abs |SELECT abs(-1); |PASSED |OK |OK | -|acos |SELECT acos(1); |PASSED |OK |OK | -|acosh |SELECT acosh(1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|add_months |SELECT add_months('2016-08-31', 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'add_months'.\nDid you mean 'acos'?\n | -|aes_decrypt |SELECT aes_decrypt(unhex('83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94'), '0000111122223333'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'aes_decrypt'.\nDid you mean 'list_except'?\n | -|aes_encrypt |SELECT hex(aes_encrypt('Spark', '0000111122223333')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'hex'.\nDid you mean 'exp'?\n | -|aggregate |SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|and |SELECT true and true; |PASSED |OK |OK | -|any |SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'any'.\nDid you mean 'abs'?\n | -|any_value |SELECT any_value(col) FROM VALUES (10), (5), (20) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'any_value'.\nDid you mean 'LAST_VALUE'?\n | -|approx_count_distinct |SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'approx_count_distinct'.\nDid you mean 'APPROX_DISTINCT'?\n | -|approx_percentile |SELECT approx_percentile(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 0.5")\n | -|array |SELECT array(1, 2, 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|array_agg |SELECT array_agg(col) FROM VALUES (1), (2), (1) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|array_append |SELECT array_append(array('b', 'd', 'c', 'a'), 'd'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 'b'")\n | -|array_compact |SELECT array_compact(array(1, 2, 3, null)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|array_contains |SELECT array_contains(array(1, 2, 3), 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|array_distinct |SELECT array_distinct(array(1, 2, 3, null, 3)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|array_except |SELECT array_except(array(1, 2, 3), array(1, 3, 5)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|array_insert |SELECT array_insert(array(1, 2, 3, 4), 5, 5); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|array_intersect |SELECT array_intersect(array(1, 2, 3), array(1, 3, 5)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|array_join |SELECT array_join(array('hello', 'world'), ' '); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 'hello'")\n | -|array_max |SELECT array_max(array(1, 20, null, 3)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|array_min |SELECT array_min(array(1, 20, null, 3)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|array_position |SELECT array_position(array(3, 2, 1), 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 3")\n | -|array_remove |SELECT array_remove(array(1, 2, 3, null, 3), 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|array_repeat |SELECT array_repeat('123', 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|array_size |SELECT array_size(array('b', 'd', 'c', 'a')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 'b'")\n | -|array_sort |SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 5")\n | -|array_union |SELECT array_union(array(1, 2, 3), array(1, 3, 5)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|arrays_overlap |SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|arrays_zip |SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|ascii |SELECT ascii('222'); |PASSED |OK |OK | -|asin |SELECT asin(0); |PASSED |OK |OK | -|asinh |SELECT asinh(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|assert_true |SELECT assert_true(0 < 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'assert_true'.\nDid you mean 'date_trunc'?\n | -|atan |SELECT atan(0); |PASSED |OK |OK | -|atan2 |SELECT atan2(0, 0); |PASSED |OK |OK | -|atanh |SELECT atanh(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|avg |SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|base64 |SELECT base64('Spark SQL'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'base64'.\nDid you mean 'asinh'?\n | -|bigint | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | -|bin |SELECT bin(13); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'bin'.\nDid you mean 'sin'?\n | -|binary | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | -|bit_and |SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|bit_count |SELECT bit_count(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'bit_count'.\nDid you mean 'COUNT'?\n | -|bit_get |SELECT bit_get(11, 0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'bit_get'.\nDid you mean 'BIT_AND'?\n | -|bit_length |SELECT bit_length('Spark SQL'); |PASSED |OK |OK | -|bit_or |SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|bit_xor |SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|bool_and |SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|bool_or |SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|boolean | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | -|bround |SELECT bround(2.5, 0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'bround'.\nDid you mean 'round'?\n | -|btrim |SELECT btrim(' SparkSQL '); |PASSED |OK |OK | -|cardinality |SELECT cardinality(array('b', 'd', 'c', 'a')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 'b'")\n | -|cast |SELECT cast('10' as int); |PASSED |OK |OK | -|cbrt |SELECT cbrt(27.0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|ceil |SELECT ceil(-0.1); |PASSED |OK |OK | -|ceiling |SELECT ceiling(-0.1); |PASSED |OK |std_err: Error during planning: Invalid function 'ceiling'.\nDid you mean 'ceil'?\n | -|char |SELECT char(65); |PASSED |OK |std_err: Error during planning: Invalid function 'char'.\nDid you mean 'chr'?\n | -|char_length |SELECT char_length('Spark SQL '); |PASSED |OK |OK | -|character_length |SELECT character_length('Spark SQL '); |PASSED |OK |OK | -|chr |SELECT chr(65); |PASSED |OK |OK | -|coalesce |SELECT coalesce(NULL, 1, NULL); |PASSED |OK |OK | -|collect_list |SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'collect_list'.\nDid you mean 'make_list'?\n | -|collect_set |SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'collect_set'.\nDid you mean 'coalesce'?\n | -|concat |SELECT concat('Spark', 'SQL'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|concat_ws |SELECT concat_ws(' ', 'Spark', 'SQL'); |PASSED |OK |OK | -|contains |SELECT contains('Spark SQL', 'Spark'); |FAILED |Failed on native side: found CometNativeException |std_err: Error during planning: Invalid function 'contains'.\nDid you mean 'concat_ws'?\n | -|conv |SELECT conv('100', 2, 10); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'conv'.\nDid you mean 'cot'?\n | -|convert_timezone |SELECT convert_timezone('Europe/Brussels', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected ), found: '2021-12-06 00:00:00'")\n | -|corr |SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|cos |SELECT cos(0); |PASSED |OK |OK | -|cosh |SELECT cosh(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|cot |SELECT cot(1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|count |SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|count_if |SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'count_if'.\nDid you mean 'COUNT'?\n | -|count_min_sketch |SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: SQL error: ParserError("Expected ), found: d")\n | -|covar_pop |SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|covar_samp |SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|crc32 |SELECT crc32('Spark'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'crc32'.\nDid you mean 'ascii'?\n | -|csc |SELECT csc(1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'csc'.\nDid you mean 'chr'?\n | -|cume_dist |SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|curdate |SELECT curdate(); |PASSED |OK |std_err: Error during planning: Invalid function 'curdate'.\nDid you mean 'to_date'?\n | -|current_catalog |SELECT current_catalog(); |PASSED |OK |std_err: SQL error: ParserError("Expected end of statement, found: (")\n | -|current_database |SELECT current_database(); |PASSED |OK |std_err: Error during planning: Invalid function 'current_database'.\nDid you mean 'current_date'?\n | -|current_date |SELECT current_date(); |PASSED |OK |OK | -|current_schema |SELECT current_schema(); |PASSED |OK |std_err: Error during planning: Invalid function 'current_schema'.\nDid you mean 'current_time'?\n | -|current_timestamp |SELECT current_timestamp(); |FAILED |\nResults do not match for query:\nTimezone: sun.util.calendar.ZoneInfo[id="America/Los_Angeles",offset=-28800000,dstSavings=3600000,useDaylight=true,transitions=185,lastRule=java.util.SimpleTimeZone[id=America/Los_Angeles,offset=-28800000,dstSavings=3600000,useDaylight=true,startYear=0,startMode=3,startMonth=2,startDay=8,startDayOfWeek=1,startTime=7200000,startTimeMode=0,endMode=3,endMonth=10,endDay=1,endDayOfWeek=1,endTime=7200000,endTimeMode=0]]\nTimezone Env: \n\n== Parsed Logical Plan ==\nProject [current_timestamp() AS current_timestamp()#3031, x#3015]\n+- SubqueryAlias tbl\n +- View (`tbl`, [x#3015])\n +- Relation [x#3015] parquet\n\n== Analyzed Logical Plan ==\ncurrent_timestamp(): timestamp, x: string\nProject [current_timestamp() AS current_timestamp()#3031, x#3015]\n+- SubqueryAlias tbl\n +- View (`tbl`, [x#3015])\n +- Relation [x#3015] parquet\n\n== Optimized Logical Plan ==\nProject [2024-05-10 10:13:21.77322 AS current_timestamp()#3031, x#3015]\n+- Relation [x#3015] parquet\n\n== Physical Plan ==\n*(1) ColumnarToRow\n+- CometProject [current_timestamp()#3031, x#3015], [2024-05-10 10:13:21.77322 AS current_timestamp()#3031, x#3015]\n +- CometScan parquet [x#3015] Batched: true, DataFilters: [], Format: CometParquet, Location: InMemoryFileIndex(1 paths)[file:/private/var/folders/6f/_s1vnnd55zgfkx7zlwnrnv0h0000gn/T/spark-62..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct\n\n== Results ==\n\n== Results ==\n!== Correct Answer - 1 == == Spark Answer - 1 ==\n struct struct\n![2024-05-10 10:13:21.749547,dummy] [2024-05-10 10:13:21.77322,dummy]\n \n |std_err: Error during planning: Invalid function 'current_timestamp'.\nDid you mean 'current_time'?\n | -|current_timezone |SELECT current_timezone(); |PASSED |OK |std_err: Error during planning: Invalid function 'current_timezone'.\nDid you mean 'current_time'?\n | -|current_user |SELECT current_user(); |PASSED |OK |std_err: SQL error: ParserError("Expected end of statement, found: (")\n | -|date | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | -|date_add |SELECT date_add('2016-07-30', 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'date_add'.\nDid you mean 'date_bin'?\n | -|date_diff |SELECT date_diff('2009-07-31', '2009-07-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'date_diff'.\nDid you mean 'date_bin'?\n | -|date_format |SELECT date_format('2016-04-08', 'y'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|date_from_unix_date |SELECT date_from_unix_date(1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'date_from_unix_date'.\nDid you mean 'from_unixtime'?\n | -|date_part |SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456'); |PASSED |OK |OK | -|date_sub |SELECT date_sub('2016-07-30', 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'date_sub'.\nDid you mean 'date_bin'?\n | -|date_trunc |SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359'); |FAILED |Failed on native side: found CometNativeException | | -|dateadd |SELECT dateadd('2016-07-30', 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'dateadd'.\nDid you mean 'datepart'?\n | -|datediff |SELECT datediff('2009-07-31', '2009-07-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'datediff'.\nDid you mean 'datepart'?\n | -|datepart |SELECT datepart('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456'); |PASSED |OK |OK | -|day |SELECT day('2009-07-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'day'.\nDid you mean 'today'?\n | -|dayofmonth |SELECT dayofmonth('2009-07-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'dayofmonth'.\nDid you mean 'datepart'?\n | -|dayofweek |SELECT dayofweek('2009-07-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'dayofweek'.\nDid you mean 'degrees'?\n | -|dayofyear |SELECT dayofyear('2016-04-09'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'dayofyear'.\nDid you mean 'to_char'?\n | -|decimal | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | -|decode |SELECT decode(encode('abc', 'utf-8'), 'utf-8'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: There is no built-in encoding named 'utf-8', currently supported encodings are: base64, hex\n | -|degrees |SELECT degrees(3.141592653589793); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|dense_rank |SELECT a, b, dense_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: No function matches the given name and argument types 'DENSE_RANK(Int32)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tDENSE_RANK()\n | -|div |SELECT 3 div 2; |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("No infix parser for token Word(Word { value: \"div\", quote_style: None, keyword: DIV })")\n | -|double | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | -|e |SELECT e(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'e'.\nDid you mean 'exp'?\n | -|element_at |SELECT element_at(array(1, 2, 3), 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|elt |SELECT elt(1, 'scala', 'java'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'elt'.\nDid you mean 'ln'?\n | -|encode |SELECT encode('abc', 'utf-8'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: There is no built-in encoding named 'utf-8', currently supported encodings are: base64, hex\n | -|endswith |SELECT endswith('Spark SQL', 'SQL'); |FAILED |Failed on native side: found CometNativeException |std_err: Error during planning: Invalid function 'endswith'.\nDid you mean 'ends_with'?\n | -|equal_null |SELECT equal_null(3, 3); |PASSED |OK |std_err: Error during planning: Invalid function 'equal_null'.\nDid you mean 'ifnull'?\n | -|every |SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'every'.\nDid you mean 'reverse'?\n | -|exists |SELECT exists(array(1, 2, 3), x -> x % 2 == 0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: array")\n | -|exp |SELECT exp(0); |PASSED |OK |OK | -|explode |SELECT explode(array(10, 20)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 10")\n | -|explode_outer |SELECT explode_outer(array(10, 20)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 10")\n | -|expm1 |SELECT expm1(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'expm1'.\nDid you mean 'exp'?\n | -|extract |SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'); |PASSED |OK |OK | -|factorial |SELECT factorial(5); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|filter |SELECT filter(array(1, 2, 3), x -> x % 2 == 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|find_in_set |SELECT find_in_set('ab','abc,b,ab,c,def'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|first |SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'first'.\nDid you mean 'right'?\n | -|first_value |SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|flatten |SELECT flatten(array(array(1, 2), array(3, 4))); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: array")\n | -|float | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | -|floor |SELECT floor(-0.1); |PASSED |OK |OK | -|forall |SELECT forall(array(1, 2, 3), x -> x % 2 == 0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|format_number |SELECT format_number(12332.123456, 4); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'format_number'.\nDid you mean 'FIRST_VALUE'?\n | -|format_string |SELECT format_string("Hello World %d %s", 100, "days"); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'format_string'.\nDid you mean 'array_to_string'?\n | -|from_csv |SELECT from_csv('1, 0.8', 'a INT, b DOUBLE'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'from_csv'.\nDid you mean 'arrow_cast'?\n | -|from_json |SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'from_json'.\nDid you mean 'floor'?\n | -|from_unixtime |SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: No function matches the given name and argument types 'from_unixtime(Int64, Utf8)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tfrom_unixtime(Int64)\n | -|from_utc_timestamp |SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'from_utc_timestamp'.\nDid you mean 'to_timestamp'?\n | -|get |SELECT get(array(1, 2, 3), 0); |PASSED |OK |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|get_json_object |SELECT get_json_object('{"a":"b"}', '$.a'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'get_json_object'.\nDid you mean 'list_pop_back'?\n | -|getbit |SELECT getbit(11, 0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'getbit'.\nDid you mean 'ceil'?\n | -|greatest |SELECT greatest(10, 9, 2, 4, 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'greatest'.\nDid you mean 'repeat'?\n | -|grouping |SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name); |FAILED |[MISSING_AGGREGATION] The non-aggregating expression "age" is based on columns which are not participating in the GROUP BY clause.\nAdd the columns or the expression to the GROUP BY, aggregate the expression, or use "any_value(age)" if you do not care which of the values within a group is returned.;\nAggregate [name#2277, spark_grouping_id#2276L], [age#2273, name#2277]\n+- Expand [[age#2273, name#2274, name#2275, 0], [age#2273, name#2274, null, 1]], [age#2273, name#2274, name#2277, spark_grouping_id#2276L]\n +- Project [age#2273, name#2274, name#2274 AS name#2275]\n +- SubqueryAlias people\n +- LocalRelation [age#2273, name#2274]\n | | -|grouping_id |SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height);|FAILED |[MISSING_AGGREGATION] The non-aggregating expression "age" is based on columns which are not participating in the GROUP BY clause.\nAdd the columns or the expression to the GROUP BY, aggregate the expression, or use "any_value(age)" if you do not care which of the values within a group is returned.;\nAggregate [name#7432, height#7433, spark_grouping_id#7431L], [age#7426, name#7432, height#7433]\n+- Expand [[age#7426, name#7427, height#7428, name#7429, height#7430, 0], [age#7426, name#7427, height#7428, name#7429, null, 1], [age#7426, name#7427, height#7428, null, height#7430, 2], [age#7426, name#7427, height#7428, null, null, 3]], [age#7426, name#7427, height#7428, name#7432, height#7433, spark_grouping_id#7431L]\n +- Project [age#7426, name#7427, height#7428, name#7427 AS name#7429, height#7428 AS height#7430]\n +- SubqueryAlias people\n +- LocalRelation [age#7426, name#7427, height#7428]\n | | -|hash |SELECT hash('Spark', array(123), 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 123")\n | -|hex |SELECT hex(17); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'hex'.\nDid you mean 'exp'?\n | -|histogram_numeric |SELECT histogram_numeric(col, 5) FROM VALUES (0), (1), (2), (10) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'histogram_numeric'.\nDid you mean 'list_remove_n'?\n | -|hour |SELECT hour('2009-07-30 12:58:59'); |FAILED |Failed on native side: found CometNativeException |std_err: Error during planning: Invalid function 'hour'.\nDid you mean 'CORR'?\n | -|hypot |SELECT hypot(3, 4); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'hypot'.\nDid you mean 'pow'?\n | -|if |SELECT if(1 < 2, 'a', 'b'); |PASSED |OK |std_err: Error during planning: Invalid function 'if'.\nDid you mean 'sin'?\n | -|ifnull |SELECT ifnull(NULL, array('2')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: '2'")\n | -|ilike |SELECT ilike('Spark', '_Park'); |FAILED |Failed on native side: found CometNativeException |std_err: Error during planning: Invalid function 'ilike'.\nDid you mean 'lpad'?\n | -|in |SELECT 1 in(1, 2, 3); |PASSED |OK |OK | -|initcap |SELECT initcap('sPark sql'); |PASSED |OK |OK | -|inline |SELECT inline(array(struct(1, 'a'), struct(2, 'b'))); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: struct")\n | -|inline_outer |SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: struct")\n | -|input_file_block_length |SELECT input_file_block_length(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'input_file_block_length'.\nDid you mean 'octet_length'?\n | -|input_file_block_start |SELECT input_file_block_start(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'input_file_block_start'.\nDid you mean 'list_replace_all'?\n | -|input_file_name |SELECT input_file_name(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'input_file_name'.\nDid you mean 'bit_length'?\n | -|instr |SELECT instr('SparkSQL', 'SQL'); |PASSED |OK |OK | -|int | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | -|isnan |SELECT isnan(cast('NaN' as double)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|isnotnull |SELECT isnotnull(1); |PASSED |OK |std_err: Error during planning: Invalid function 'isnotnull'.\nDid you mean 'ifnull'?\n | -|isnull |SELECT isnull(1); |PASSED |OK |std_err: Error during planning: Invalid function 'isnull'.\nDid you mean 'ifnull'?\n | -|java_method |SELECT java_method('java.util.UUID', 'randomUUID'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'java_method'.\nDid you mean 'make_date'?\n | -|json_array_length |SELECT json_array_length('[1,2,3,4]'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'json_array_length'.\nDid you mean 'array_length'?\n | -|json_object_keys |SELECT json_object_keys('{}'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'json_object_keys'.\nDid you mean 'concat_ws'?\n | -|json_tuple |SELECT json_tuple('{"a":1, "b":2}', 'a', 'b'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'json_tuple'.\nDid you mean 'strpos'?\n | -|kurtosis |SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'kurtosis'.\nDid you mean 'rtrim'?\n | -|lag |SELECT a, b, lag(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|last |SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'last'.\nDid you mean 'left'?\n | -|last_day |SELECT last_day('2009-01-12'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'last_day'.\nDid you mean 'list_cat'?\n | -|last_value |SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|lcase |SELECT lcase('SparkSql'); |PASSED |OK |std_err: Error during planning: Invalid function 'lcase'.\nDid you mean 'acos'?\n | -|lead |SELECT a, b, lead(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|least |SELECT least(10, 9, 2, 4, 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'least'.\nDid you mean 'left'?\n | -|left |SELECT left('Spark SQL', 3); |FAILED |Failed on native side: found CometNativeException | | -|len |SELECT len('Spark SQL '); |PASSED |OK |std_err: Error during planning: Invalid function 'len'.\nDid you mean 'ln'?\n | -|length |SELECT length('Spark SQL '); |PASSED |OK |OK | -|levenshtein |SELECT levenshtein('kitten', 'sitting'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|like |SELECT like('Spark', '_park'); |FAILED |Failed on native side: found CometNativeException |std_err: Error during planning: Invalid function 'like'.\nDid you mean 'lower'?\n | -|ln |SELECT ln(1); |PASSED |OK |OK | -|localtimestamp |SELECT localtimestamp(); |FAILED |Failed on native side: found CometNativeException |std_err: Error during planning: Invalid function 'localtimestamp'.\nDid you mean 'to_timestamp'?\n | -|locate |SELECT locate('bar', 'foobarbar'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'locate'.\nDid you mean 'to_date'?\n | -|log |SELECT log(10, 100); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|log10 |SELECT log10(10); |PASSED |OK |OK | -|log1p |SELECT log1p(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'log1p'.\nDid you mean 'log10'?\n | -|log2 |SELECT log2(2); |PASSED |OK |OK | -|lower |SELECT lower('SparkSql'); |PASSED |OK |OK | -|lpad |SELECT lpad('hi', 5, '??'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|ltrim |SELECT ltrim(' SparkSQL '); |PASSED |OK |OK | -|make_date |SELECT make_date(2013, 7, 15); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|make_dt_interval |SELECT make_dt_interval(1, 12, 30, 01.001001); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'make_dt_interval'.\nDid you mean 'make_date'?\n | -|make_interval |SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'make_interval'.\nDid you mean 'make_array'?\n | -|make_timestamp |SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'make_timestamp'.\nDid you mean 'to_timestamp'?\n | -|make_timestamp_ltz |SELECT make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'make_timestamp_ltz'.\nDid you mean 'to_timestamp'?\n | -|make_timestamp_ntz |SELECT make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'make_timestamp_ntz'.\nDid you mean 'to_timestamp'?\n | -|make_ym_interval |SELECT make_ym_interval(1, 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'make_ym_interval'.\nDid you mean 'array_intersect'?\n | -|map |SELECT map(1.0, '2', 3.0, '4'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map'.\nDid you mean 'MAX'?\n | -|map_concat |SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map_concat'.\nDid you mean 'array_concat'?\n | -|map_contains_key |SELECT map_contains_key(map(1, 'a', 2, 'b'), 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map_contains_key'.\nDid you mean 'array_contains'?\n | -|map_entries |SELECT map_entries(map(1, 'a', 2, 'b')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map_entries'.\nDid you mean 'make_array'?\n | -|map_filter |SELECT map_filter(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map_filter'.\nDid you mean 'make_date'?\n | -|map_from_arrays |SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1.0")\n | -|map_from_entries |SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'b'))); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: struct")\n | -|map_keys |SELECT map_keys(map(1, 'a', 2, 'b')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map_keys'.\nDid you mean 'make_list'?\n | -|map_values |SELECT map_values(map(1, 'a', 2, 'b')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map_values'.\nDid you mean 'LAST_VALUE'?\n | -|map_zip_with |SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map_zip_with'.\nDid you mean 'starts_with'?\n | -|mask |SELECT mask('abcd-EFGH-8765-4321'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'mask'.\nDid you mean 'MAX'?\n | -|max |SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|max_by |SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'max_by'.\nDid you mean 'MAX'?\n | -|md5 |SELECT md5('Spark'); |PASSED |OK |OK | -|mean |SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|median |SELECT median(col) FROM VALUES (0), (10) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|min |SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|min_by |SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'min_by'.\nDid you mean 'MIN'?\n | -|minute |SELECT minute('2009-07-30 12:58:59'); |FAILED |Failed on native side: found CometNativeException |std_err: Error during planning: Invalid function 'minute'.\nDid you mean 'instr'?\n | -|mod |SELECT 2 % 1.8; |PASSED |OK |OK | -|mode |SELECT mode(col) FROM VALUES (0), (10), (10) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'mode'.\nDid you mean 'md5'?\n | -|monotonically_increasing_id|SELECT monotonically_increasing_id(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'monotonically_increasing_id'.\nDid you mean 'array_intersect'?\n | -|month |SELECT month('2016-07-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'month'.\nDid you mean 'tanh'?\n | -|months_between |SELECT months_between('1997-02-28 10:30:00', '1996-10-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'months_between'.\nDid you mean 'NTH_VALUE'?\n | -|named_struct |SELECT named_struct("a", 1, "b", 2, "c", 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Schema error: No field named a.\n | -|nanvl |SELECT nanvl(cast('NaN' as double), 123); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|negative |SELECT negative(1); |PASSED |OK |std_err: Error during planning: Invalid function 'negative'.\nDid you mean 'nanvl'?\n | -|next_day |SELECT next_day('2015-01-14', 'TU'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'next_day'.\nDid you mean 'today'?\n | -|not |SELECT not true; |PASSED |OK |OK | -|now |SELECT now(); |FAILED |\nResults do not match for query:\nTimezone: sun.util.calendar.ZoneInfo[id="America/Los_Angeles",offset=-28800000,dstSavings=3600000,useDaylight=true,transitions=185,lastRule=java.util.SimpleTimeZone[id=America/Los_Angeles,offset=-28800000,dstSavings=3600000,useDaylight=true,startYear=0,startMode=3,startMonth=2,startDay=8,startDayOfWeek=1,startTime=7200000,startTimeMode=0,endMode=3,endMonth=10,endDay=1,endDayOfWeek=1,endTime=7200000,endTimeMode=0]]\nTimezone Env: \n\n== Parsed Logical Plan ==\nProject [now() AS now()#4526, x#4510]\n+- SubqueryAlias tbl\n +- View (`tbl`, [x#4510])\n +- Relation [x#4510] parquet\n\n== Analyzed Logical Plan ==\nnow(): timestamp, x: string\nProject [now() AS now()#4526, x#4510]\n+- SubqueryAlias tbl\n +- View (`tbl`, [x#4510])\n +- Relation [x#4510] parquet\n\n== Optimized Logical Plan ==\nProject [2024-05-10 10:13:29.302484 AS now()#4526, x#4510]\n+- Relation [x#4510] parquet\n\n== Physical Plan ==\n*(1) ColumnarToRow\n+- CometProject [now()#4526, x#4510], [2024-05-10 10:13:29.302484 AS now()#4526, x#4510]\n +- CometScan parquet [x#4510] Batched: true, DataFilters: [], Format: CometParquet, Location: InMemoryFileIndex(1 paths)[file:/private/var/folders/6f/_s1vnnd55zgfkx7zlwnrnv0h0000gn/T/spark-28..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct\n\n== Results ==\n\n== Results ==\n!== Correct Answer - 1 == == Spark Answer - 1 ==\n struct struct\n![2024-05-10 10:13:29.285019,dummy] [2024-05-10 10:13:29.302484,dummy]\n \n | | -|nth_value |SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|ntile |SELECT a, b, ntile(2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|nullif |SELECT nullif(2, 2); |PASSED |OK |OK | -|nvl |SELECT nvl(NULL, array('2')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: '2'")\n | -|nvl2 |SELECT nvl2(NULL, 2, 1); |PASSED |OK |OK | -|octet_length |SELECT octet_length('Spark SQL'); |PASSED |OK |OK | -|or |SELECT true or false; |PASSED |OK |OK | -|overlay |SELECT overlay('Spark SQL' PLACING '_' FROM 6); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|parse_url |SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'parse_url'.\nDid you mean 'date_part'?\n | -|percent_rank |SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: No function matches the given name and argument types 'PERCENT_RANK(Int32)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tPERCENT_RANK()\n | -|percentile |SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'percentile'.\nDid you mean 'current_time'?\n | -|percentile_approx |SELECT percentile_approx(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 0.5")\n | -|pi |SELECT pi(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|pmod |SELECT pmod(10, 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'pmod'.\nDid you mean 'pow'?\n | -|posexplode |SELECT posexplode(array(10,20)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 10")\n | -|posexplode_outer |SELECT posexplode_outer(array(10,20)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 10")\n | -|position |SELECT position('bar', 'foobarbar'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Position function must include IN keyword")\n | -|positive |SELECT positive(1); |PASSED |OK |std_err: Error during planning: Invalid function 'positive'.\nDid you mean 'position'?\n | -|pow |SELECT pow(2, 3); |PASSED |OK |OK | -|power |SELECT power(2, 3); |PASSED |OK |OK | -|printf |SELECT printf("Hello World %d %s", 100, "days"); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'printf'.\nDid you mean 'asinh'?\n | -|quarter |SELECT quarter('2016-08-31'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'quarter'.\nDid you mean 'flatten'?\n | -|radians |SELECT radians(180); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|raise_error |SELECT raise_error('custom error message'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'raise_error'.\nDid you mean 'make_array'?\n | -|rand |SELECT rand(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'rand'.\nDid you mean 'tan'?\n | -|randn |SELECT randn(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'randn'.\nDid you mean 'random'?\n | -|random |SELECT random(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|rank |SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: No function matches the given name and argument types 'RANK(Int32)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tRANK()\n | -|reduce |SELECT reduce(array(1, 2, 3), 0, (acc, x) -> acc + x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|reflect |SELECT reflect('java.util.UUID', 'randomUUID'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'reflect'.\nDid you mean 'replace'?\n | -|regexp |SELECT regexp('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: TokenizerError("unsupported escape char: '\\U'")\n | -|regexp_count |SELECT regexp_count('Steven Jones and Stephen Smith are the best players', 'Ste(v|ph)en'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'regexp_count'.\nDid you mean 'REGR_COUNT'?\n | -|regexp_extract |SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'regexp_extract'.\nDid you mean 'regexp_match'?\n | -|regexp_extract_all |SELECT regexp_extract_all('100-200, 300-400', '(\\d+)-(\\d+)', 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'regexp_extract_all'.\nDid you mean 'regexp_match'?\n | -|regexp_instr |SELECT regexp_instr('user@spark.apache.org', '@[^.]*'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'regexp_instr'.\nDid you mean 'regexp_like'?\n | -|regexp_like |SELECT regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: TokenizerError("unsupported escape char: '\\U'")\n | -|regexp_replace |SELECT regexp_replace('100-200', '(\\d+)', 'num'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|regexp_substr |SELECT regexp_substr('Steven Jones and Stephen Smith are the best players', 'Ste(v|ph)en'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'regexp_substr'.\nDid you mean 'regexp_like'?\n | -|regr_avgx |SELECT regr_avgx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|regr_avgy |SELECT regr_avgy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|regr_count |SELECT regr_count(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|regr_intercept |SELECT regr_intercept(y, x) FROM VALUES (1,1), (2,2), (3,3) AS tab(y, x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|regr_r2 |SELECT regr_r2(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|regr_slope |SELECT regr_slope(y, x) FROM VALUES (1,1), (2,2), (3,3) AS tab(y, x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|regr_sxx |SELECT regr_sxx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|regr_sxy |SELECT regr_sxy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|regr_syy |SELECT regr_syy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|repeat |SELECT repeat('123', 2); |PASSED |OK |OK | -|replace |SELECT replace('ABCabc', 'abc', 'DEF'); |PASSED |OK |OK | -|reverse |SELECT reverse('Spark SQL'); |PASSED |OK |OK | -|right |SELECT right('Spark SQL', 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|rint |SELECT rint(12.3456); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'rint'.\nDid you mean 'sin'?\n | -|rlike |SELECT rlike('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: TokenizerError("unsupported escape char: '\\U'")\n | -|round |SELECT round(2.5, 0); |PASSED |OK |OK | -|row_number |SELECT a, b, row_number() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|rpad |SELECT rpad('hi', 5, '??'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|rtrim |SELECT rtrim(' SparkSQL '); |PASSED |OK |OK | -|schema_of_csv |SELECT schema_of_csv('1,abc'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'schema_of_csv'.\nDid you mean 'concat_ws'?\n | -|schema_of_json |SELECT schema_of_json('[{"col":0}]'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'schema_of_json'.\nDid you mean 'concat_ws'?\n | -|sec |SELECT sec(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'sec'.\nDid you mean 'sin'?\n | -|second |SELECT second('2009-07-30 12:58:59'); |FAILED |Failed on native side: found CometNativeException |std_err: Error during planning: Invalid function 'second'.\nDid you mean 'decode'?\n | -|sentences |SELECT sentences('Hi there! Good morning.'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'sentences'.\nDid you mean 'degrees'?\n | -|sequence |SELECT sequence(1, 5); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'sequence'.\nDid you mean 'coalesce'?\n | -|sha |SELECT sha('Spark'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'sha'.\nDid you mean 'chr'?\n | -|sha1 |SELECT sha1('Spark'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'sha1'.\nDid you mean 'sha512'?\n | -|sha2 |SELECT sha2('Spark', 256); |PASSED |OK |std_err: Error during planning: Invalid function 'sha2'.\nDid you mean 'sha224'?\n | -|shiftleft |SELECT shiftleft(2, 1); |PASSED |OK |std_err: Error during planning: Invalid function 'shiftleft'.\nDid you mean 'left'?\n | -|shiftright |SELECT shiftright(4, 1); |PASSED |OK |std_err: Error during planning: Invalid function 'shiftright'.\nDid you mean 'right'?\n | -|shiftrightunsigned |SELECT shiftrightunsigned(4, 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'shiftrightunsigned'.\nDid you mean 'list_union'?\n | -|shuffle |SELECT shuffle(array(1, 20, 3, 5)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|sign |SELECT sign(40); |PASSED |OK |std_err: Error during planning: Invalid function 'sign'.\nDid you mean 'sin'?\n | -|signum |SELECT signum(40); |PASSED |OK |OK | -|sin |SELECT sin(0); |PASSED |OK |OK | -|sinh |SELECT sinh(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|size |SELECT size(array('b', 'd', 'c', 'a')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 'b'")\n | -|skewness |SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'skewness'.\nDid you mean 'degrees'?\n | -|slice |SELECT slice(array(1, 2, 3, 4), 2, 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|smallint | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | -|some |SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'some'.\nDid you mean 'SUM'?\n | -|sort_array |SELECT sort_array(array('b', 'd', null, 'c', 'a'), true); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 'b'")\n | -|soundex |SELECT soundex('Miller'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'soundex'.\nDid you mean 'round'?\n | -|space |SELECT concat(space(2), '1'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'space'.\nDid you mean 'lpad'?\n | -|spark_partition_id |SELECT spark_partition_id(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'spark_partition_id'.\nDid you mean 'list_position'?\n | -|split |SELECT split('oneAtwoBthreeC', '[ABC]'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'split'.\nDid you mean 'sqrt'?\n | -|split_part |SELECT split_part('11.12.13', '.', 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|sqrt |SELECT sqrt(4); |PASSED |OK |OK | -|stack |SELECT stack(2, 1, 2, 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'stack'.\nDid you mean 'atanh'?\n | -|startswith |SELECT startswith('Spark SQL', 'Spark'); |FAILED |Failed on native side: found CometNativeException |std_err: Error during planning: Invalid function 'startswith'.\nDid you mean 'starts_with'?\n | -|std |SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'std'.\nDid you mean 'gcd'?\n | -|stddev |SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|stddev_pop |SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|stddev_samp |SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|str_to_map |SELECT str_to_map('a:1,b:2,c:3', ',', ':'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'str_to_map'.\nDid you mean 'strpos'?\n | -|string | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | -|struct |SELECT struct(1, 2, 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|substr |SELECT substr('Spark SQL', 5); |FAILED |Failed on native side: found CometNativeException | | -|substring |SELECT substring('Spark SQL', 5); |FAILED |Failed on native side: found CometNativeException |std_err: This feature is not implemented: Unsupported ast node in sqltorel: Substring { expr: Value(SingleQuotedString("Spark SQL")), substring_from: Some(Value(Number("5", false))), substring_for: None, special: true }\n | -|substring_index |SELECT substring_index('www.apache.org', '.', 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|sum |SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|tan |SELECT tan(0); |PASSED |OK |OK | -|tanh |SELECT tanh(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|timestamp | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | -|timestamp_micros |SELECT timestamp_micros(1230219000123123); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'timestamp_micros'.\nDid you mean 'to_timestamp_micros'?\n | -|timestamp_millis |SELECT timestamp_millis(1230219000123); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'timestamp_millis'.\nDid you mean 'to_timestamp_millis'?\n | -|timestamp_seconds |SELECT timestamp_seconds(1230219000); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'timestamp_seconds'.\nDid you mean 'to_timestamp_seconds'?\n | -|tinyint | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | -|to_binary |SELECT to_binary('abc', 'utf-8'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'to_binary'.\nDid you mean 'to_char'?\n | -|to_char |SELECT to_char(454, '999'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: No function matches the given name and argument types 'to_char(Int64, Utf8)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tto_char(Date32, Utf8)\n\tto_char(Date64, Utf8)\n\tto_char(Time32(Millisecond), Utf8)\n\tto_char(Time32(Second), Utf8)\n\tto_char(Time64(Microsecond), Utf8)\n\tto_char(Time64(Nanosecond), Utf8)\n\tto_char(Timestamp(Second, None), Utf8)\n\tto_char(Timestamp(Second, Some("+TZ")), Utf8)\n\tto_char(Timestamp(Millisecond, None), Utf8)\n\tto_char(Timestamp(Millisecond, Some("+TZ")), Utf8)\n\tto_char(Timestamp(Microsecond, None), Utf8)\n\tto_char(Timestamp(Microsecond, Some("+TZ")), Utf8)\n\tto_char(Timestamp(Nanosecond, None), Utf8)\n\tto_char(Timestamp(Nanosecond, Some("+TZ")), Utf8)\n\tto_char(Duration(Second), Utf8)\n\tto_char(Duration(Millisecond), Utf8)\n\tto_char(Duration(Microsecond), Utf8)\n\tto_char(Duration(Nanosecond), Utf8)\n| -|to_csv |SELECT to_csv(named_struct('a', 1, 'b', 2)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'to_csv'.\nDid you mean 'to_hex'?\n | -|to_date |SELECT to_date('2009-07-30 04:17:52'); |PASSED |OK |OK | -|to_json |SELECT to_json(named_struct('a', 1, 'b', 2)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'to_json'.\nDid you mean 'to_hex'?\n | -|to_number |SELECT to_number('454', '999'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'to_number'.\nDid you mean 'to_char'?\n | -|to_timestamp |SELECT to_timestamp('2016-12-31 00:12:00'); |PASSED |OK |OK | -|to_timestamp_ltz |SELECT to_timestamp_ltz('2016-12-31 00:12:00'); |PASSED |OK |std_err: Error during planning: Invalid function 'to_timestamp_ltz'.\nDid you mean 'to_timestamp'?\n | -|to_timestamp_ntz |SELECT to_timestamp_ntz('2016-12-31 00:12:00'); |FAILED |Failed on native side: found CometNativeException |std_err: Error during planning: Invalid function 'to_timestamp_ntz'.\nDid you mean 'to_timestamp_nanos'?\n | -|to_unix_timestamp |SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'to_unix_timestamp'.\nDid you mean 'to_timestamp'?\n | -|to_utc_timestamp |SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'to_utc_timestamp'.\nDid you mean 'to_timestamp'?\n | -|transform |SELECT transform(array(1, 2, 3), x -> x + 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|transform_keys |SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|transform_values |SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> v + 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|translate |SELECT translate('AaBbCc', 'abc', '123'); |PASSED |OK |OK | -|trim |SELECT trim(' SparkSQL '); |PASSED |OK |OK | -|trunc |SELECT trunc('2019-08-04', 'week'); |FAILED |Failed on native side: found CometNativeException |std_err: Error during planning: No function matches the given name and argument types 'trunc(Utf8, Utf8)'. You might need to add explicit type casts.\n\tCandidate functions:\n\ttrunc(Float32, Int64)\n\ttrunc(Float64, Int64)\n\ttrunc(Float64)\n\ttrunc(Float32)\n | -|try_add |SELECT try_add(1, 2); |PASSED |OK |std_err: Error during planning: Invalid function 'try_add'.\nDid you mean 'rpad'?\n | -|try_avg |SELECT try_avg(col) FROM VALUES (1), (2), (3) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'try_avg'.\nDid you mean 'AVG'?\n | -|try_divide |SELECT try_divide(3, 2); |PASSED |OK |std_err: Error during planning: Invalid function 'try_divide'.\nDid you mean 'to_date'?\n | -|try_element_at |SELECT try_element_at(array(1, 2, 3), 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|try_multiply |SELECT try_multiply(2, 3); |PASSED |OK |std_err: Error during planning: Invalid function 'try_multiply'.\nDid you mean 'array_union'?\n | -|try_subtract |SELECT try_subtract(2, 1); |PASSED |OK |std_err: Error during planning: Invalid function 'try_subtract'.\nDid you mean 'array_extract'?\n | -|try_sum |SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'try_sum'.\nDid you mean 'trim'?\n | -|try_to_binary |SELECT try_to_binary('abc', 'utf-8'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'try_to_binary'.\nDid you mean 'string_to_array'?\n | -|try_to_number |SELECT try_to_number('454', '999'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'try_to_number'.\nDid you mean 'array_to_string'?\n | -|try_to_timestamp |SELECT try_to_timestamp('2016-12-31 00:12:00'); |PASSED |OK |std_err: Error during planning: Invalid function 'try_to_timestamp'.\nDid you mean 'to_timestamp'?\n | -|typeof |SELECT typeof(1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'typeof'.\nDid you mean 'repeat'?\n | -|ucase |SELECT ucase('SparkSql'); |PASSED |OK |std_err: Error during planning: Invalid function 'ucase'.\nDid you mean 'acos'?\n | -|unbase64 |SELECT unbase64('U3BhcmsgU1FM'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'unbase64'.\nDid you mean 'sha256'?\n | -|unhex |SELECT decode(unhex('537061726B2053514C'), 'UTF-8'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'unhex'.\nDid you mean 'upper'?\n | -|unix_date |SELECT unix_date(DATE("1970-01-02")); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'unix_date'.\nDid you mean 'to_date'?\n | -|unix_micros |SELECT unix_micros(TIMESTAMP('1970-01-01 00:00:01Z')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'unix_micros'.\nDid you mean 'initcap'?\n | -|unix_millis |SELECT unix_millis(TIMESTAMP('1970-01-01 00:00:01Z')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'unix_millis'.\nDid you mean 'nullif'?\n | -|unix_seconds |SELECT unix_seconds(TIMESTAMP('1970-01-01 00:00:01Z')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'unix_seconds'.\nDid you mean 'decode'?\n | -|unix_timestamp |SELECT unix_timestamp(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'unix_timestamp'.\nDid you mean 'to_timestamp'?\n | -|upper |SELECT upper('SparkSql'); |PASSED |OK |OK | -|url_decode |SELECT url_decode('https%3A%2F%2Fspark.apache.org'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'url_decode'.\nDid you mean 'decode'?\n | -|url_encode |SELECT url_encode('https://spark.apache.org'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'url_encode'.\nDid you mean 'encode'?\n | -|user |SELECT user(); |PASSED |OK |std_err: SQL error: ParserError("Expected end of statement, found: (")\n | -|uuid |SELECT uuid(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|var_pop |SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|var_samp |SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | -|variance |SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v37.1.0\nError: Error during planning: Invalid function 'variance'.\nDid you mean 'range'?\n | -|version |SELECT version(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'version'.\nDid you mean 'sin'?\n | -|weekday |SELECT weekday('2009-07-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'weekday'.\nDid you mean 'today'?\n | -|weekofyear |SELECT weekofyear('2008-02-20'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'weekofyear'.\nDid you mean 'repeat'?\n | -|when |SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END; |PASSED |OK |OK | -|width_bucket |SELECT width_bucket(5.3, 0.2, 10.6, 5); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'width_bucket'.\nDid you mean 'list_cat'?\n | -|xpath |SELECT xpath('b1b2b3c1c2','a/b/text()'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath'.\nDid you mean 'tanh'?\n | -|xpath_boolean |SELECT xpath_boolean('1','a/b'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_boolean'.\nDid you mean 'date_format'?\n | -|xpath_double |SELECT xpath_double('12', 'sum(a/b)'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_double'.\nDid you mean 'date_format'?\n | -|xpath_float |SELECT xpath_float('12', 'sum(a/b)'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_float'.\nDid you mean 'date_format'?\n | -|xpath_int |SELECT xpath_int('12', 'sum(a/b)'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_int'.\nDid you mean 'date_bin'?\n | -|xpath_long |SELECT xpath_long('12', 'sum(a/b)'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_long'.\nDid you mean 'date_bin'?\n | -|xpath_number |SELECT xpath_number('12', 'sum(a/b)'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_number'.\nDid you mean 'date_bin'?\n | -|xpath_short |SELECT xpath_short('12', 'sum(a/b)'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_short'.\nDid you mean 'list_sort'?\n | -|xpath_string |SELECT xpath_string('bcc','a/c'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_string'.\nDid you mean 'date_trunc'?\n | -|xxhash64 |SELECT xxhash64('Spark', array(123), 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 123")\n | -|year |SELECT year('2016-07-30'); |PASSED |OK |std_err: Error during planning: Invalid function 'year'.\nDid you mean 'VAR'?\n | -|zip_with |SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | -|| |SELECT 3 | 5; |PASSED |OK |OK | -|~ |SELECT ~ 0; |FAILED |Failed on native side: found CometNativeException |std_err: SQL error: ParserError("Expected an expression:, found: ~")\n | -+---------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------+-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------+-------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +|name |query |result |cometMessage |datafusionMessage | ++---------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------+-------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +|! |SELECT ! true; |PASSED |OK |std_err: SQL error: ParserError("Expected an expression:, found: !")\n | +|% |SELECT 2 % 1.8; |PASSED |OK |OK | +|& |SELECT 3 & 5; |PASSED |OK |OK | +|* |SELECT 2 * 3; |PASSED |OK |OK | +|+ |SELECT 1 + 2; |PASSED |OK |OK | +|- |SELECT 2 - 1; |PASSED |OK |OK | +|/ |SELECT 3 / 2; |PASSED |OK |OK | +|< |SELECT 1 < 2; |PASSED |OK |OK | +|<= |SELECT 2 <= 2; |PASSED |OK |OK | +|<=> |SELECT 2 <=> 2; |PASSED |OK |std_err: This feature is not implemented: Unsupported SQL binary operator Spaceship\n | +|= |SELECT 2 = 2; |PASSED |OK |OK | +|== |SELECT 2 == 2; |PASSED |OK |OK | +|> |SELECT 2 > 1; |PASSED |OK |OK | +|>= |SELECT 2 >= 1; |PASSED |OK |OK | +|^ |SELECT 3 ^ 5; |PASSED |OK |OK | +|abs |SELECT abs(-1); |PASSED |OK |OK | +|acos |SELECT acos(1); |PASSED |OK |OK | +|acosh |SELECT acosh(1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|add_months |SELECT add_months('2016-08-31', 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'add_months'.\nDid you mean 'radians'?\n | +|aes_decrypt |SELECT aes_decrypt(unhex('83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94'), '0000111122223333'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'aes_decrypt'.\nDid you mean 'list_except'?\n | +|aes_encrypt |SELECT hex(aes_encrypt('Spark', '0000111122223333')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'hex'.\nDid you mean 'exp'?\n | +|aggregate |SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|and |SELECT true and true; |PASSED |OK |OK | +|any |SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col); |PASSED |OK |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'any'.\nDid you mean 'ln'?\n | +|any_value |SELECT any_value(col) FROM VALUES (10), (5), (20) AS tab(col); |PASSED |OK |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'any_value'.\nDid you mean 'LAST_VALUE'?\n | +|approx_count_distinct |SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'approx_count_distinct'.\nDid you mean 'APPROX_DISTINCT'?\n | +|approx_percentile |SELECT approx_percentile(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 0.5")\n | +|array |SELECT array(1, 2, 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|array_agg |SELECT array_agg(col) FROM VALUES (1), (2), (1) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|array_append |SELECT array_append(array('b', 'd', 'c', 'a'), 'd'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 'b'")\n | +|array_compact |SELECT array_compact(array(1, 2, 3, null)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|array_contains |SELECT array_contains(array(1, 2, 3), 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|array_distinct |SELECT array_distinct(array(1, 2, 3, null, 3)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|array_except |SELECT array_except(array(1, 2, 3), array(1, 3, 5)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|array_insert |SELECT array_insert(array(1, 2, 3, 4), 5, 5); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|array_intersect |SELECT array_intersect(array(1, 2, 3), array(1, 3, 5)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|array_join |SELECT array_join(array('hello', 'world'), ' '); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 'hello'")\n | +|array_max |SELECT array_max(array(1, 20, null, 3)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|array_min |SELECT array_min(array(1, 20, null, 3)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|array_position |SELECT array_position(array(3, 2, 1), 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 3")\n | +|array_remove |SELECT array_remove(array(1, 2, 3, null, 3), 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|array_repeat |SELECT array_repeat('123', 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|array_size |SELECT array_size(array('b', 'd', 'c', 'a')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 'b'")\n | +|array_sort |SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 5")\n | +|array_union |SELECT array_union(array(1, 2, 3), array(1, 3, 5)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|arrays_overlap |SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|arrays_zip |SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|ascii |SELECT ascii('222'); |PASSED |OK |OK | +|asin |SELECT asin(0); |PASSED |OK |OK | +|asinh |SELECT asinh(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|assert_true |SELECT assert_true(0 < 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'assert_true'.\nDid you mean 'date_trunc'?\n | +|atan |SELECT atan(0); |PASSED |OK |OK | +|atan2 |SELECT atan2(0, 0); |PASSED |OK |OK | +|atanh |SELECT atanh(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|avg |SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col); |PASSED |OK |OK | +|base64 |SELECT base64('Spark SQL'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'base64'.\nDid you mean 'asinh'?\n | +|bigint | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | +|bin |SELECT bin(13); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'bin'.\nDid you mean 'sin'?\n | +|binary | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | +|bit_and |SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col); |PASSED |OK |OK | +|bit_count |SELECT bit_count(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'bit_count'.\nDid you mean 'COUNT'?\n | +|bit_get |SELECT bit_get(11, 0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'bit_get'.\nDid you mean 'BIT_AND'?\n | +|bit_length |SELECT bit_length('Spark SQL'); |PASSED |OK |OK | +|bit_or |SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col); |PASSED |OK |OK | +|bit_xor |SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col); |PASSED |OK |OK | +|bool_and |SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col); |PASSED |OK |OK | +|bool_or |SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col); |PASSED |OK |OK | +|boolean | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | +|bround |SELECT bround(2.5, 0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'bround'.\nDid you mean 'round'?\n | +|btrim |SELECT btrim(' SparkSQL '); |PASSED |OK |OK | +|cardinality |SELECT cardinality(array('b', 'd', 'c', 'a')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 'b'")\n | +|cast |SELECT cast('10' as int); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|cbrt |SELECT cbrt(27.0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|ceil |SELECT ceil(-0.1); |PASSED |OK |OK | +|ceiling |SELECT ceiling(-0.1); |PASSED |OK |std_err: Error during planning: Invalid function 'ceiling'.\nDid you mean 'ceil'?\n | +|char |SELECT char(65); |PASSED |OK |std_err: Error during planning: Invalid function 'char'.\nDid you mean 'chr'?\n | +|char_length |SELECT char_length('Spark SQL '); |PASSED |OK |OK | +|character_length |SELECT character_length('Spark SQL '); |PASSED |OK |OK | +|chr |SELECT chr(65); |PASSED |OK |OK | +|coalesce |SELECT coalesce(NULL, 1, NULL); |PASSED |OK |OK | +|collect_list |SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'collect_list'.\nDid you mean 'make_list'?\n | +|collect_set |SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'collect_set'.\nDid you mean 'coalesce'?\n | +|concat |SELECT concat('Spark', 'SQL'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|concat_ws |SELECT concat_ws(' ', 'Spark', 'SQL'); |PASSED |OK |OK | +|contains |SELECT contains('Spark SQL', 'Spark'); |PASSED |OK |std_err: Error during planning: Invalid function 'contains'.\nDid you mean 'concat'?\n | +|conv |SELECT conv('100', 2, 10); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'conv'.\nDid you mean 'cos'?\n | +|convert_timezone |SELECT convert_timezone('Europe/Brussels', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected ), found: '2021-12-06 00:00:00'")\n | +|corr |SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|cos |SELECT cos(0); |PASSED |OK |OK | +|cosh |SELECT cosh(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|cot |SELECT cot(1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|count |SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col); |PASSED |OK |OK | +|count_if |SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col); |PASSED |OK |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'count_if'.\nDid you mean 'COUNT'?\n | +|count_min_sketch |SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: SQL error: ParserError("Expected ), found: d")\n | +|covar_pop |SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2); |PASSED |OK |OK | +|covar_samp |SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2); |PASSED |OK |OK | +|crc32 |SELECT crc32('Spark'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'crc32'.\nDid you mean 'nvl2'?\n | +|csc |SELECT csc(1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'csc'.\nDid you mean 'cot'?\n | +|cume_dist |SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|curdate |SELECT curdate(); |PASSED |OK |std_err: Error during planning: Invalid function 'curdate'.\nDid you mean 'to_date'?\n | +|current_catalog |SELECT current_catalog(); |PASSED |OK |std_err: SQL error: ParserError("Expected end of statement, found: (")\n | +|current_database |SELECT current_database(); |PASSED |OK |std_err: Error during planning: Invalid function 'current_database'.\nDid you mean 'current_date'?\n | +|current_date |SELECT current_date(); |PASSED |OK |OK | +|current_schema |SELECT current_schema(); |PASSED |OK |std_err: Error during planning: Invalid function 'current_schema'.\nDid you mean 'current_time'?\n | +|current_timestamp |SELECT current_timestamp(); |FAILED |Unsupported: Results do not match for query |std_err: Error during planning: Invalid function 'current_timestamp'.\nDid you mean 'current_time'?\n | +|current_timezone |SELECT current_timezone(); |PASSED |OK |std_err: Error during planning: Invalid function 'current_timezone'.\nDid you mean 'current_time'?\n | +|current_user |SELECT current_user(); |PASSED |OK |std_err: SQL error: ParserError("Expected end of statement, found: (")\n | +|date | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | +|date_add |SELECT date_add('2016-07-30', 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'date_add'.\nDid you mean 'date_bin'?\n | +|date_diff |SELECT date_diff('2009-07-31', '2009-07-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'date_diff'.\nDid you mean 'date_bin'?\n | +|date_format |SELECT date_format('2016-04-08', 'y'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|date_from_unix_date |SELECT date_from_unix_date(1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'date_from_unix_date'.\nDid you mean 'from_unixtime'?\n | +|date_part |SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456'); |PASSED |OK |OK | +|date_sub |SELECT date_sub('2016-07-30', 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'date_sub'.\nDid you mean 'date_bin'?\n | +|date_trunc |SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|dateadd |SELECT dateadd('2016-07-30', 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'dateadd'.\nDid you mean 'datepart'?\n | +|datediff |SELECT datediff('2009-07-31', '2009-07-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'datediff'.\nDid you mean 'date_bin'?\n | +|datepart |SELECT datepart('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456'); |PASSED |OK |OK | +|day |SELECT day('2009-07-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'day'.\nDid you mean 'tan'?\n | +|dayofmonth |SELECT dayofmonth('2009-07-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'dayofmonth'.\nDid you mean 'date_part'?\n | +|dayofweek |SELECT dayofweek('2009-07-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'dayofweek'.\nDid you mean 'lower'?\n | +|dayofyear |SELECT dayofyear('2016-04-09'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'dayofyear'.\nDid you mean 'to_char'?\n | +|decimal | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | +|decode |SELECT decode(encode('abc', 'utf-8'), 'utf-8'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: There is no built-in encoding named 'utf-8', currently supported encodings are: base64, hex\n | +|degrees |SELECT degrees(3.141592653589793); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|dense_rank |SELECT a, b, dense_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: Error during planning: No function matches the given name and argument types 'DENSE_RANK(Int32)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tDENSE_RANK()\n | +|div |SELECT 3 div 2; |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("No infix parser for token Word(Word { value: \"div\", quote_style: None, keyword: DIV })")\n | +|double | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | +|e |SELECT e(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'e'.\nDid you mean 'ln'?\n | +|element_at |SELECT element_at(array(1, 2, 3), 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|elt |SELECT elt(1, 'scala', 'java'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'elt'.\nDid you mean 'exp'?\n | +|encode |SELECT encode('abc', 'utf-8'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: There is no built-in encoding named 'utf-8', currently supported encodings are: base64, hex\n | +|endswith |SELECT endswith('Spark SQL', 'SQL'); |PASSED |OK |std_err: Error during planning: Invalid function 'endswith'.\nDid you mean 'ends_with'?\n | +|equal_null |SELECT equal_null(3, 3); |PASSED |OK |std_err: Error during planning: Invalid function 'equal_null'.\nDid you mean 'ifnull'?\n | +|every |SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col); |PASSED |OK |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'every'.\nDid you mean 'overlay'?\n | +|exists |SELECT exists(array(1, 2, 3), x -> x % 2 == 0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: array")\n | +|exp |SELECT exp(0); |PASSED |OK |OK | +|explode |SELECT explode(array(10, 20)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 10")\n | +|explode_outer |SELECT explode_outer(array(10, 20)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 10")\n | +|expm1 |SELECT expm1(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'expm1'.\nDid you mean 'exp'?\n | +|extract |SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'); |PASSED |OK |OK | +|factorial |SELECT factorial(5); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|filter |SELECT filter(array(1, 2, 3), x -> x % 2 == 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|find_in_set |SELECT find_in_set('ab','abc,b,ab,c,def'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|first |SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col); |PASSED |OK |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'first'.\nDid you mean 'sqrt'?\n | +|first_value |SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col); |PASSED |OK |OK | +|flatten |SELECT flatten(array(array(1, 2), array(3, 4))); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: array")\n | +|float | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | +|floor |SELECT floor(-0.1); |PASSED |OK |OK | +|forall |SELECT forall(array(1, 2, 3), x -> x % 2 == 0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|format_number |SELECT format_number(12332.123456, 4); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'format_number'.\nDid you mean 'FIRST_VALUE'?\n | +|format_string |SELECT format_string("Hello World %d %s", 100, "days"); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'format_string'.\nDid you mean 'array_to_string'?\n | +|from_csv |SELECT from_csv('1, 0.8', 'a INT, b DOUBLE'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'from_csv'.\nDid you mean 'arrow_cast'?\n | +|from_json |SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'from_json'.\nDid you mean 'floor'?\n | +|from_unixtime |SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Error during planning: [data_types_with_scalar_udf] Coercion from [Int64, Utf8] to the signature Uniform(1, [Int64]) failed. and No function matches the given name and argument types 'from_unixtime(Int64, Utf8)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tfrom_unixtime(Int64)\n | +|from_utc_timestamp |SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'from_utc_timestamp'.\nDid you mean 'to_timestamp'?\n | +|get |SELECT get(array(1, 2, 3), 0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|get_json_object |SELECT get_json_object('{"a":"b"}', '$.a'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'get_json_object'.\nDid you mean 'list_intersect'?\n | +|getbit |SELECT getbit(11, 0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'getbit'.\nDid you mean 'ceil'?\n | +|greatest |SELECT greatest(10, 9, 2, 4, 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'greatest'.\nDid you mean 'repeat'?\n | +|grouping |SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name); |FAILED |[MISSING_AGGREGATION] The non-aggregating expression "age" is based on columns which are not participating in the GROUP BY clause.\nAdd the columns or the expression to the GROUP BY, aggregate the expression, or use "any_value(age)" if you do not care which of the values within a group is returned.;\nAggregate [name#7445, spark_grouping_id#7444L], [age#7441, name#7445]\n+- Expand [[age#7441, name#7442, name#7443, 0], [age#7441, name#7442, null, 1]], [age#7441, name#7442, name#7445, spark_grouping_id#7444L]\n +- Project [age#7441, name#7442, name#7442 AS name#7443]\n +- SubqueryAlias people\n +- LocalRelation [age#7441, name#7442]\n | | +|grouping_id |SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height);|FAILED |[MISSING_AGGREGATION] The non-aggregating expression "age" is based on columns which are not participating in the GROUP BY clause.\nAdd the columns or the expression to the GROUP BY, aggregate the expression, or use "any_value(age)" if you do not care which of the values within a group is returned.;\nAggregate [name#8183, height#8184, spark_grouping_id#8182L], [age#8177, name#8183, height#8184]\n+- Expand [[age#8177, name#8178, height#8179, name#8180, height#8181, 0], [age#8177, name#8178, height#8179, name#8180, null, 1], [age#8177, name#8178, height#8179, null, height#8181, 2], [age#8177, name#8178, height#8179, null, null, 3]], [age#8177, name#8178, height#8179, name#8183, height#8184, spark_grouping_id#8182L]\n +- Project [age#8177, name#8178, height#8179, name#8178 AS name#8180, height#8179 AS height#8181]\n +- SubqueryAlias people\n +- LocalRelation [age#8177, name#8178, height#8179]\n| | +|hash |SELECT hash('Spark', array(123), 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 123")\n | +|hex |SELECT hex(17); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'hex'.\nDid you mean 'exp'?\n | +|histogram_numeric |SELECT histogram_numeric(col, 5) FROM VALUES (0), (1), (2), (10) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'histogram_numeric'.\nDid you mean 'list_reverse'?\n | +|hour |SELECT hour('2009-07-30 12:58:59'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'hour'.\nDid you mean 'CORR'?\n | +|hypot |SELECT hypot(3, 4); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'hypot'.\nDid you mean 'pow'?\n | +|if |SELECT if(1 < 2, 'a', 'b'); |PASSED |OK |std_err: Error during planning: Invalid function 'if'.\nDid you mean 'sin'?\n | +|ifnull |SELECT ifnull(NULL, array('2')); |PASSED |OK |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: '2'")\n | +|ilike |SELECT ilike('Spark', '_Park'); |PASSED |OK |std_err: Error during planning: Invalid function 'ilike'.\nDid you mean 'ceil'?\n | +|in |SELECT 1 in(1, 2, 3); |PASSED |OK |OK | +|initcap |SELECT initcap('sPark sql'); |PASSED |OK |OK | +|inline |SELECT inline(array(struct(1, 'a'), struct(2, 'b'))); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: struct")\n | +|inline_outer |SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: struct")\n | +|input_file_block_length |SELECT input_file_block_length(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'input_file_block_length'.\nDid you mean 'octet_length'?\n | +|input_file_block_start |SELECT input_file_block_start(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'input_file_block_start'.\nDid you mean 'list_replace_all'?\n | +|input_file_name |SELECT input_file_name(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'input_file_name'.\nDid you mean 'to_timestamp'?\n | +|instr |SELECT instr('SparkSQL', 'SQL'); |PASSED |OK |OK | +|int | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | +|isnan |SELECT isnan(cast('NaN' as double)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|isnotnull |SELECT isnotnull(1); |PASSED |OK |std_err: Error during planning: Invalid function 'isnotnull'.\nDid you mean 'ifnull'?\n | +|isnull |SELECT isnull(1); |PASSED |OK |std_err: Error during planning: Invalid function 'isnull'.\nDid you mean 'ifnull'?\n | +|json_array_length |SELECT json_array_length('[1,2,3,4]'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'json_array_length'.\nDid you mean 'array_length'?\n | +|json_object_keys |SELECT json_object_keys('{}'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'json_object_keys'.\nDid you mean 'concat_ws'?\n | +|json_tuple |SELECT json_tuple('{"a":1, "b":2}', 'a', 'b'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'json_tuple'.\nDid you mean 'ifnull'?\n | +|kurtosis |SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'kurtosis'.\nDid you mean 'rtrim'?\n | +|lag |SELECT a, b, lag(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|last |SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col); |PASSED |OK |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'last'.\nDid you mean 'left'?\n | +|last_day |SELECT last_day('2009-01-12'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'last_day'.\nDid you mean 'list_cat'?\n | +|last_value |SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col); |PASSED |OK |OK | +|lcase |SELECT lcase('SparkSql'); |PASSED |OK |std_err: Error during planning: Invalid function 'lcase'.\nDid you mean 'cos'?\n | +|lead |SELECT a, b, lead(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|least |SELECT least(10, 9, 2, 4, 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'least'.\nDid you mean 'left'?\n | +|left |SELECT left('Spark SQL', 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|len |SELECT len('Spark SQL '); |PASSED |OK |std_err: Error during planning: Invalid function 'len'.\nDid you mean 'ln'?\n | +|length |SELECT length('Spark SQL '); |PASSED |OK |OK | +|levenshtein |SELECT levenshtein('kitten', 'sitting'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|like |SELECT like('Spark', '_park'); |PASSED |OK |std_err: Error during planning: Invalid function 'like'.\nDid you mean 'lpad'?\n | +|ln |SELECT ln(1); |PASSED |OK |OK | +|localtimestamp |SELECT localtimestamp(); |FAILED |Failed on native side: found CometNativeException |std_err: Error during planning: Invalid function 'localtimestamp'.\nDid you mean 'to_timestamp'?\n | +|locate |SELECT locate('bar', 'foobarbar'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'locate'.\nDid you mean 'concat'?\n | +|log |SELECT log(10, 100); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|log10 |SELECT log10(10); |PASSED |OK |OK | +|log1p |SELECT log1p(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'log1p'.\nDid you mean 'log10'?\n | +|log2 |SELECT log2(2); |PASSED |OK |OK | +|lower |SELECT lower('SparkSql'); |PASSED |OK |OK | +|lpad |SELECT lpad('hi', 5, '??'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|ltrim |SELECT ltrim(' SparkSQL '); |PASSED |OK |OK | +|make_date |SELECT make_date(2013, 7, 15); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|make_dt_interval |SELECT make_dt_interval(1, 12, 30, 01.001001); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'make_dt_interval'.\nDid you mean 'make_date'?\n | +|make_interval |SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'make_interval'.\nDid you mean 'make_date'?\n | +|make_timestamp |SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'make_timestamp'.\nDid you mean 'to_timestamp'?\n | +|make_timestamp_ltz |SELECT make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'make_timestamp_ltz'.\nDid you mean 'to_timestamp'?\n | +|make_timestamp_ntz |SELECT make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'make_timestamp_ntz'.\nDid you mean 'to_timestamp'?\n | +|make_ym_interval |SELECT make_ym_interval(1, 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'make_ym_interval'.\nDid you mean 'make_date'?\n | +|map |SELECT map(1.0, '2', 3.0, '4'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map'.\nDid you mean 'MAX'?\n | +|map_concat |SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map_concat'.\nDid you mean 'array_concat'?\n | +|map_contains_key |SELECT map_contains_key(map(1, 'a', 2, 'b'), 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map_contains_key'.\nDid you mean 'array_contains'?\n | +|map_entries |SELECT map_entries(map(1, 'a', 2, 'b')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map_entries'.\nDid you mean 'make_list'?\n | +|map_filter |SELECT map_filter(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map_filter'.\nDid you mean 'make_date'?\n | +|map_from_arrays |SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1.0")\n | +|map_from_entries |SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'b'))); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: struct")\n | +|map_keys |SELECT map_keys(map(1, 'a', 2, 'b')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map_keys'.\nDid you mean 'to_hex'?\n | +|map_values |SELECT map_values(map(1, 'a', 2, 'b')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map_values'.\nDid you mean 'LAST_VALUE'?\n | +|map_zip_with |SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'map_zip_with'.\nDid you mean 'starts_with'?\n | +|mask |SELECT mask('abcd-EFGH-8765-4321'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'mask'.\nDid you mean 'MAX'?\n | +|max |SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col); |PASSED |OK |OK | +|max_by |SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'max_by'.\nDid you mean 'MAX'?\n | +|md5 |SELECT md5('Spark'); |PASSED |OK |OK | +|mean |SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col); |PASSED |OK |OK | +|median |SELECT median(col) FROM VALUES (0), (10) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|min |SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col); |PASSED |OK |OK | +|min_by |SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'min_by'.\nDid you mean 'MIN'?\n | +|minute |SELECT minute('2009-07-30 12:58:59'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'minute'.\nDid you mean 'instr'?\n | +|mod |SELECT 2 % 1.8; |PASSED |OK |OK | +|mode |SELECT mode(col) FROM VALUES (0), (10), (10) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'mode'.\nDid you mean 'md5'?\n | +|monotonically_increasing_id|SELECT monotonically_increasing_id(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'monotonically_increasing_id'.\nDid you mean 'array_intersect'?\n | +|month |SELECT month('2016-07-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'month'.\nDid you mean 'cosh'?\n | +|months_between |SELECT months_between('1997-02-28 10:30:00', '1996-10-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'months_between'.\nDid you mean 'NTH_VALUE'?\n | +|named_struct |SELECT named_struct("a", 1, "b", 2, "c", 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Schema error: No field named a.\n | +|nanvl |SELECT nanvl(cast('NaN' as double), 123); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|negative |SELECT negative(1); |PASSED |OK |std_err: Error during planning: Invalid function 'negative'.\nDid you mean 'nanvl'?\n | +|next_day |SELECT next_day('2015-01-14', 'TU'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'next_day'.\nDid you mean 'today'?\n | +|not |SELECT not true; |PASSED |OK |OK | +|now |SELECT now(); |FAILED |Unsupported: Results do not match for query | | +|nth_value |SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|ntile |SELECT a, b, ntile(2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|nullif |SELECT nullif(2, 2); |PASSED |OK |OK | +|nvl |SELECT nvl(NULL, array('2')); |PASSED |OK |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: '2'")\n | +|nvl2 |SELECT nvl2(NULL, 2, 1); |PASSED |OK |OK | +|octet_length |SELECT octet_length('Spark SQL'); |PASSED |OK |OK | +|or |SELECT true or false; |PASSED |OK |OK | +|overlay |SELECT overlay('Spark SQL' PLACING '_' FROM 6); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|parse_url |SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'parse_url'.\nDid you mean 'power'?\n | +|percent_rank |SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: Error during planning: No function matches the given name and argument types 'PERCENT_RANK(Int32)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tPERCENT_RANK()\n | +|percentile |SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'percentile'.\nDid you mean 'ceil'?\n | +|percentile_approx |SELECT percentile_approx(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 0.5")\n | +|pi |SELECT pi(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|pmod |SELECT pmod(10, 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'pmod'.\nDid you mean 'pow'?\n | +|posexplode |SELECT posexplode(array(10,20)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 10")\n | +|posexplode_outer |SELECT posexplode_outer(array(10,20)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 10")\n | +|position |SELECT position('bar', 'foobarbar'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Position function must include IN keyword")\n | +|positive |SELECT positive(1); |PASSED |OK |std_err: Error during planning: Invalid function 'positive'.\nDid you mean 'position'?\n | +|pow |SELECT pow(2, 3); |PASSED |OK |OK | +|power |SELECT power(2, 3); |PASSED |OK |OK | +|printf |SELECT printf("Hello World %d %s", 100, "days"); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'printf'.\nDid you mean 'instr'?\n | +|quarter |SELECT quarter('2016-08-31'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'quarter'.\nDid you mean 'flatten'?\n | +|radians |SELECT radians(180); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|raise_error |SELECT raise_error('custom error message'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'raise_error'.\nDid you mean 'make_array'?\n | +|rand |SELECT rand(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'rand'.\nDid you mean 'rpad'?\n | +|randn |SELECT randn(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'randn'.\nDid you mean 'range'?\n | +|random |SELECT random(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|rank |SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: Error during planning: No function matches the given name and argument types 'RANK(Int32)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tRANK()\n | +|reduce |SELECT reduce(array(1, 2, 3), 0, (acc, x) -> acc + x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|regexp |SELECT regexp('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: TokenizerError("unsupported escape char: '\\U'")\n | +|regexp_count |SELECT regexp_count('Steven Jones and Stephen Smith are the best players', 'Ste(v|ph)en'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'regexp_count'.\nDid you mean 'REGR_COUNT'?\n | +|regexp_extract |SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'regexp_extract'.\nDid you mean 'regexp_match'?\n | +|regexp_extract_all |SELECT regexp_extract_all('100-200, 300-400', '(\\d+)-(\\d+)', 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'regexp_extract_all'.\nDid you mean 'regexp_replace'?\n | +|regexp_instr |SELECT regexp_instr('user@spark.apache.org', '@[^.]*'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'regexp_instr'.\nDid you mean 'regexp_like'?\n | +|regexp_like |SELECT regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: TokenizerError("unsupported escape char: '\\U'")\n | +|regexp_replace |SELECT regexp_replace('100-200', '(\\d+)', 'num'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|regexp_substr |SELECT regexp_substr('Steven Jones and Stephen Smith are the best players', 'Ste(v|ph)en'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'regexp_substr'.\nDid you mean 'regexp_match'?\n | +|regr_avgx |SELECT regr_avgx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); |PASSED |OK |OK | +|regr_avgy |SELECT regr_avgy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); |PASSED |OK |OK | +|regr_count |SELECT regr_count(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); |PASSED |OK |OK | +|regr_intercept |SELECT regr_intercept(y, x) FROM VALUES (1,1), (2,2), (3,3) AS tab(y, x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|regr_r2 |SELECT regr_r2(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|regr_slope |SELECT regr_slope(y, x) FROM VALUES (1,1), (2,2), (3,3) AS tab(y, x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|regr_sxx |SELECT regr_sxx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|regr_sxy |SELECT regr_sxy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|regr_syy |SELECT regr_syy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|repeat |SELECT repeat('123', 2); |PASSED |OK |OK | +|replace |SELECT replace('ABCabc', 'abc', 'DEF'); |PASSED |OK |OK | +|reverse |SELECT reverse('Spark SQL'); |PASSED |OK |OK | +|right |SELECT right('Spark SQL', 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|rint |SELECT rint(12.3456); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'rint'.\nDid you mean 'sin'?\n | +|rlike |SELECT rlike('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: TokenizerError("unsupported escape char: '\\U'")\n | +|round |SELECT round(2.5, 0); |PASSED |OK |OK | +|row_number |SELECT a, b, row_number() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|rpad |SELECT rpad('hi', 5, '??'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|rtrim |SELECT rtrim(' SparkSQL '); |PASSED |OK |OK | +|schema_of_csv |SELECT schema_of_csv('1,abc'); |FAILED |[DATATYPE_MISMATCH.NON_FOLDABLE_INPUT] Cannot resolve "schema_of_csv(c0)" due to data type mismatch: the input csv should be a foldable "STRING" expression; however, got "c0".; line 1 pos 7;\n'Project [unresolvedalias(schema_of_csv(c0#6515), None)]\n+- SubqueryAlias tbl\n +- View (`tbl`, [c0#6515])\n +- Relation [c0#6515] parquet\n |std_err: Error during planning: Invalid function 'schema_of_csv'.\nDid you mean 'concat_ws'?\n | +|schema_of_json |SELECT schema_of_json('[{"col":0}]'); |FAILED |[DATATYPE_MISMATCH.NON_FOLDABLE_INPUT] Cannot resolve "schema_of_json(c0)" due to data type mismatch: the input json should be a foldable "STRING" expression; however, got "c0".; line 1 pos 7;\n'Project [unresolvedalias(schema_of_json(c0#7685), None)]\n+- SubqueryAlias tbl\n +- View (`tbl`, [c0#7685])\n +- Relation [c0#7685] parquet\n |std_err: Error during planning: Invalid function 'schema_of_json'.\nDid you mean 'concat_ws'?\n | +|sec |SELECT sec(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'sec'.\nDid you mean 'sin'?\n | +|second |SELECT second('2009-07-30 12:58:59'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'second'.\nDid you mean 'decode'?\n | +|sentences |SELECT sentences('Hi there! Good morning.'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'sentences'.\nDid you mean 'degrees'?\n | +|sequence |SELECT sequence(1, 5); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'sequence'.\nDid you mean 'replace'?\n | +|sha |SELECT sha('Spark'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'sha'.\nDid you mean 'chr'?\n | +|sha1 |SELECT sha1('Spark'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'sha1'.\nDid you mean 'sha512'?\n | +|sha2 |SELECT sha2('Spark', 256); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'sha2'.\nDid you mean 'sha224'?\n | +|shiftleft |SELECT shiftleft(2, 1); |PASSED |OK |std_err: Error during planning: Invalid function 'shiftleft'.\nDid you mean 'left'?\n | +|shiftright |SELECT shiftright(4, 1); |PASSED |OK |std_err: Error during planning: Invalid function 'shiftright'.\nDid you mean 'right'?\n | +|shiftrightunsigned |SELECT shiftrightunsigned(4, 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'shiftrightunsigned'.\nDid you mean 'list_union'?\n | +|shuffle |SELECT shuffle(array(1, 20, 3, 5)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|sign |SELECT sign(40); |PASSED |OK |std_err: Error during planning: Invalid function 'sign'.\nDid you mean 'sin'?\n | +|signum |SELECT signum(40); |PASSED |OK |OK | +|sin |SELECT sin(0); |PASSED |OK |OK | +|sinh |SELECT sinh(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|size |SELECT size(array('b', 'd', 'c', 'a')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 'b'")\n | +|skewness |SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'skewness'.\nDid you mean 'degrees'?\n | +|slice |SELECT slice(array(1, 2, 3, 4), 2, 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|smallint | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | +|some |SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col); |PASSED |OK |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'some'.\nDid you mean 'SUM'?\n | +|sort_array |SELECT sort_array(array('b', 'd', null, 'c', 'a'), true); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 'b'")\n | +|soundex |SELECT soundex('Miller'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'soundex'.\nDid you mean 'round'?\n | +|space |SELECT concat(space(2), '1'); |PASSED |OK |std_err: Error during planning: Invalid function 'space'.\nDid you mean 'rpad'?\n | +|spark_partition_id |SELECT spark_partition_id(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'spark_partition_id'.\nDid you mean 'array_position'?\n | +|split |SELECT split('oneAtwoBthreeC', '[ABC]'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'split'.\nDid you mean 'sqrt'?\n | +|split_part |SELECT split_part('11.12.13', '.', 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|sqrt |SELECT sqrt(4); |PASSED |OK |OK | +|stack |SELECT stack(2, 1, 2, 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'stack'.\nDid you mean 'atan'?\n | +|startswith |SELECT startswith('Spark SQL', 'Spark'); |PASSED |OK |std_err: Error during planning: Invalid function 'startswith'.\nDid you mean 'starts_with'?\n | +|std |SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col); |PASSED |OK |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'std'.\nDid you mean 'sin'?\n | +|stddev |SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col); |PASSED |OK |OK | +|stddev_pop |SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col); |PASSED |OK |OK | +|stddev_samp |SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col); |PASSED |OK |OK | +|str_to_map |SELECT str_to_map('a:1,b:2,c:3', ',', ':'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'str_to_map'.\nDid you mean 'date_format'?\n | +|string | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | +|struct |SELECT struct(1, 2, 3); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|substr |SELECT substr('Spark SQL', 5); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|substring |SELECT substring('Spark SQL', 5); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|substring_index |SELECT substring_index('www.apache.org', '.', 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|sum |SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col); |PASSED |OK |OK | +|tan |SELECT tan(0); |PASSED |OK |OK | +|tanh |SELECT tanh(0); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|timestamp | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | +|timestamp_micros |SELECT timestamp_micros(1230219000123123); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'timestamp_micros'.\nDid you mean 'to_timestamp_micros'?\n | +|timestamp_millis |SELECT timestamp_millis(1230219000123); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'timestamp_millis'.\nDid you mean 'to_timestamp_millis'?\n | +|timestamp_seconds |SELECT timestamp_seconds(1230219000); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'timestamp_seconds'.\nDid you mean 'to_timestamp_seconds'?\n | +|tinyint | |SKIPPED|No examples found in spark.sessionState.functionRegistry | | +|to_binary |SELECT to_binary('abc', 'utf-8'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'to_binary'.\nDid you mean 'to_char'?\n | +|to_char |SELECT to_char(454, '999'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Error during planning: [data_types_with_scalar_udf] Coercion from [Int64, Utf8] to the signature OneOf([Exact([Date32, Utf8]), Exact([Date64, Utf8]), Exact([Time32(Millisecond), Utf8]), Exact([Time32(Second), Utf8]), Exact([Time64(Microsecond), Utf8]), Exact([Time64(Nanosecond), Utf8]), Exact([Timestamp(Second, None), Utf8]), Exact([Timestamp(Second, Some("+TZ")), Utf8]), Exact([Timestamp(Millisecond, None), Utf8]), Exact([Timestamp(Millisecond, Some("+TZ")), Utf8]), Exact([Timestamp(Microsecond, None), Utf8]), Exact([Timestamp(Microsecond, Some("+TZ")), Utf8]), Exact([Timestamp(Nanosecond, None), Utf8]), Exact([Timestamp(Nanosecond, Some("+TZ")), Utf8]), Exact([Duration(Second), Utf8]), Exact([Duration(Millisecond), Utf8]), Exact([Duration(Microsecond), Utf8]), Exact([Duration(Nanosecond), Utf8])]) failed. and No function matches the given name and argument types 'to_char(Int64, Utf8)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tto_char(Date32, Utf8)\n\tto_char(Date64, Utf8)\n\tto_char(Time32(Millisecond), Utf8)\n\tto_char(Time32(Second), Utf8)\n\tto_char(Time64(Microsecond), Utf8)\n\tto_char(Time64(Nanosecond), Utf8)\n\tto_char(Timestamp(Second, None), Utf8)\n\tto_char(Timestamp(Second, Some("+TZ")), Utf8)\n\tto_char(Timestamp(Millisecond, None), Utf8)\n\tto_char(Timestamp(Millisecond, Some("+TZ")), Utf8)\n\tto_char(Timestamp(Microsecond, None), Utf8)\n\tto_char(Timestamp(Microsecond, Some("+TZ")), Utf8)\n\tto_char(Timestamp(Nanosecond, None), Utf8)\n\tto_char(Timestamp(Nanosecond, Some("+TZ")), Utf8)\n\tto_char(Duration(Second), Utf8)\n\tto_char(Duration(Millisecond), Utf8)\n\tto_char(Duration(Microsecond), Utf8)\n\tto_char(Duration(Nanosecond), Utf8)\n| +|to_csv |SELECT to_csv(named_struct('a', 1, 'b', 2)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'to_csv'.\nDid you mean 'to_char'?\n | +|to_date |SELECT to_date('2009-07-30 04:17:52'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|to_json |SELECT to_json(named_struct('a', 1, 'b', 2)); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'to_json'.\nDid you mean 'to_char'?\n | +|to_number |SELECT to_number('454', '999'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'to_number'.\nDid you mean 'to_date'?\n | +|to_timestamp |SELECT to_timestamp('2016-12-31 00:12:00'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|to_timestamp_ltz |SELECT to_timestamp_ltz('2016-12-31 00:12:00'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'to_timestamp_ltz'.\nDid you mean 'to_timestamp'?\n | +|to_timestamp_ntz |SELECT to_timestamp_ntz('2016-12-31 00:12:00'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'to_timestamp_ntz'.\nDid you mean 'to_timestamp'?\n | +|to_unix_timestamp |SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'to_unix_timestamp'.\nDid you mean 'to_timestamp'?\n | +|to_utc_timestamp |SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'to_utc_timestamp'.\nDid you mean 'to_timestamp'?\n | +|transform |SELECT transform(array(1, 2, 3), x -> x + 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|transform_keys |SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|transform_values |SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> v + 1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|translate |SELECT translate('AaBbCc', 'abc', '123'); |PASSED |OK |OK | +|trim |SELECT trim(' SparkSQL '); |PASSED |OK |OK | +|trunc |SELECT trunc('2019-08-04', 'week'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Error during planning: [data_types_with_scalar_udf] Coercion from [Utf8, Utf8] to the signature OneOf([Exact([Float32, Int64]), Exact([Float64, Int64]), Exact([Float64]), Exact([Float32])]) failed. and No function matches the given name and argument types 'trunc(Utf8, Utf8)'. You might need to add explicit type casts.\n\tCandidate functions:\n\ttrunc(Float32, Int64)\n\ttrunc(Float64, Int64)\n\ttrunc(Float64)\n\ttrunc(Float32)\n | +|try_add |SELECT try_add(1, 2); |FAILED |Unsupported: Results do not match for query |std_err: Error during planning: Invalid function 'try_add'.\nDid you mean 'rpad'?\n | +|try_avg |SELECT try_avg(col) FROM VALUES (1), (2), (3) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'try_avg'.\nDid you mean 'AVG'?\n | +|try_divide |SELECT try_divide(3, 2); |PASSED |OK |std_err: Error during planning: Invalid function 'try_divide'.\nDid you mean 'to_date'?\n | +|try_element_at |SELECT try_element_at(array(1, 2, 3), 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|try_multiply |SELECT try_multiply(2, 3); |FAILED |Unsupported: Results do not match for query |std_err: Error during planning: Invalid function 'try_multiply'.\nDid you mean 'to_date'?\n | +|try_subtract |SELECT try_subtract(2, 1); |PASSED |OK |std_err: Error during planning: Invalid function 'try_subtract'.\nDid you mean 'array_extract'?\n | +|try_sum |SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'try_sum'.\nDid you mean 'trim'?\n | +|try_to_binary |SELECT try_to_binary('abc', 'utf-8'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'try_to_binary'.\nDid you mean 'date_bin'?\n | +|try_to_number |SELECT try_to_number('454', '999'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'try_to_number'.\nDid you mean 'to_char'?\n | +|try_to_timestamp |SELECT try_to_timestamp('2016-12-31 00:12:00'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'try_to_timestamp'.\nDid you mean 'to_timestamp'?\n | +|typeof |SELECT typeof(1); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'typeof'.\nDid you mean 'repeat'?\n | +|ucase |SELECT ucase('SparkSql'); |PASSED |OK |std_err: Error during planning: Invalid function 'ucase'.\nDid you mean 'cos'?\n | +|unbase64 |SELECT unbase64('U3BhcmsgU1FM'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'unbase64'.\nDid you mean 'sha384'?\n | +|unhex |SELECT decode(unhex('537061726B2053514C'), 'UTF-8'); |PASSED |OK |std_err: Error during planning: Invalid function 'unhex'.\nDid you mean 'upper'?\n | +|unix_date |SELECT unix_date(DATE("1970-01-02")); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'unix_date'.\nDid you mean 'to_date'?\n | +|unix_micros |SELECT unix_micros(TIMESTAMP('1970-01-01 00:00:01Z')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'unix_micros'.\nDid you mean 'acos'?\n | +|unix_millis |SELECT unix_millis(TIMESTAMP('1970-01-01 00:00:01Z')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'unix_millis'.\nDid you mean 'nullif'?\n | +|unix_seconds |SELECT unix_seconds(TIMESTAMP('1970-01-01 00:00:01Z')); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'unix_seconds'.\nDid you mean 'decode'?\n | +|unix_timestamp |SELECT unix_timestamp(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'unix_timestamp'.\nDid you mean 'to_timestamp'?\n | +|upper |SELECT upper('SparkSql'); |PASSED |OK |OK | +|url_decode |SELECT url_decode('https%3A%2F%2Fspark.apache.org'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'url_decode'.\nDid you mean 'decode'?\n | +|url_encode |SELECT url_encode('https://spark.apache.org'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'url_encode'.\nDid you mean 'encode'?\n | +|user |SELECT user(); |PASSED |OK |std_err: SQL error: ParserError("Expected end of statement, found: (")\n | +|uuid |SELECT uuid(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback | | +|var_pop |SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col); |PASSED |OK |OK | +|var_samp |SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col); |PASSED |OK |OK | +|variance |SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col); |PASSED |OK |std_out: DataFusion CLI v38.0.0\nError: Error during planning: Invalid function 'variance'.\nDid you mean 'range'?\n | +|version |SELECT version(); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'version'.\nDid you mean 'asin'?\n | +|weekday |SELECT weekday('2009-07-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'weekday'.\nDid you mean 'overlay'?\n | +|weekofyear |SELECT weekofyear('2008-02-20'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'weekofyear'.\nDid you mean 'power'?\n | +|when |SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END; |FAILED |\n[PARSE_SYNTAX_ERROR] Syntax error at or near 'then'.(line 1, pos 18)\n\n== SQL ==\nselect case a > b then c else d end from tbl\n------------------^^^\n | | +|width_bucket |SELECT width_bucket(5.3, 0.2, 10.6, 5); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'width_bucket'.\nDid you mean 'list_cat'?\n | +|xpath |SELECT xpath('b1b2b3c1c2','a/b/text()'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath'.\nDid you mean 'atanh'?\n | +|xpath_boolean |SELECT xpath_boolean('1','a/b'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_boolean'.\nDid you mean 'date_bin'?\n | +|xpath_double |SELECT xpath_double('12', 'sum(a/b)'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_double'.\nDid you mean 'date_format'?\n | +|xpath_float |SELECT xpath_float('12', 'sum(a/b)'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_float'.\nDid you mean 'date_format'?\n | +|xpath_int |SELECT xpath_int('12', 'sum(a/b)'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_int'.\nDid you mean 'date_bin'?\n | +|xpath_long |SELECT xpath_long('12', 'sum(a/b)'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_long'.\nDid you mean 'date_bin'?\n | +|xpath_number |SELECT xpath_number('12', 'sum(a/b)'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_number'.\nDid you mean 'date_bin'?\n | +|xpath_short |SELECT xpath_short('12', 'sum(a/b)'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_short'.\nDid you mean 'list_sort'?\n | +|xpath_string |SELECT xpath_string('bcc','a/c'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'xpath_string'.\nDid you mean 'date_trunc'?\n | +|xxhash64 |SELECT xxhash64('Spark', array(123), 2); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 123")\n | +|year |SELECT year('2016-07-30'); |FAILED |Unsupported: Expected only Comet native operators but found Spark fallback |std_err: Error during planning: Invalid function 'year'.\nDid you mean 'VAR'?\n | +|zip_with |SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)); |FAILED |[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `zip_with` requires 3 parameters but the actual number is 2. Please, refer to 'https://spark.apache.org/docs/latest/sql-ref-functions.html' for a fix.; line 1 pos 7 |std_err: SQL error: ParserError("Expected SELECT, VALUES, or a subquery in the query body, found: 1")\n | +|| |SELECT 3 | 5; |PASSED |OK |OK | +|~ |SELECT ~ 0; |PASSED |OK |std_err: SQL error: ParserError("Expected an expression:, found: ~")\n | ++---------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------+-------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/docs/spark_expressions_support.md b/docs/spark_expressions_support.md new file mode 100644 index 000000000..8fb975862 --- /dev/null +++ b/docs/spark_expressions_support.md @@ -0,0 +1,475 @@ + + +# Supported Spark Expressions + +### agg_funcs + - [x] any + - [x] any_value + - [ ] approx_count_distinct + - [ ] approx_percentile + - [ ] array_agg + - [x] avg + - [x] bit_and + - [x] bit_or + - [x] bit_xor + - [x] bool_and + - [x] bool_or + - [ ] collect_list + - [ ] collect_set + - [ ] corr + - [x] count + - [x] count_if + - [ ] count_min_sketch + - [x] covar_pop + - [x] covar_samp + - [x] every + - [x] first + - [x] first_value + - [ ] grouping + - [ ] grouping_id + - [ ] histogram_numeric + - [ ] kurtosis + - [x] last + - [x] last_value + - [x] max + - [ ] max_by + - [x] mean + - [ ] median + - [x] min + - [ ] min_by + - [ ] mode + - [ ] percentile + - [ ] percentile_approx + - [x] regr_avgx + - [x] regr_avgy + - [x] regr_count + - [ ] regr_intercept + - [ ] regr_r2 + - [ ] regr_slope + - [ ] regr_sxx + - [ ] regr_sxy + - [ ] regr_syy + - [ ] skewness + - [x] some + - [x] std + - [x] stddev + - [x] stddev_pop + - [x] stddev_samp + - [x] sum + - [ ] try_avg + - [ ] try_sum + - [x] var_pop + - [x] var_samp + - [x] variance + +### array_funcs + - [ ] array + - [ ] array_append + - [ ] array_compact + - [ ] array_contains + - [ ] array_distinct + - [ ] array_except + - [ ] array_insert + - [ ] array_intersect + - [ ] array_join + - [ ] array_max + - [ ] array_min + - [ ] array_position + - [ ] array_remove + - [ ] array_repeat + - [ ] array_union + - [ ] arrays_overlap + - [ ] arrays_zip + - [ ] flatten + - [ ] get + - [ ] sequence + - [ ] shuffle + - [ ] slice + - [ ] sort_array + +### bitwise_funcs + - [x] & + - [x] ^ + - [ ] bit_count + - [ ] bit_get + - [ ] getbit + - [x] shiftright + - [ ] shiftrightunsigned + - [x] | + - [x] ~ + +### collection_funcs + - [ ] array_size + - [ ] cardinality + - [ ] concat + - [x] reverse + - [ ] size + +### conditional_funcs + - [x] coalesce + - [x] if + - [x] ifnull + - [ ] nanvl + - [x] nullif + - [x] nvl + - [x] nvl2 + - [ ] when + +### conversion_funcs + - [ ] bigint + - [ ] binary + - [ ] boolean + - [ ] cast + - [ ] date + - [ ] decimal + - [ ] double + - [ ] float + - [ ] int + - [ ] smallint + - [ ] string + - [ ] timestamp + - [ ] tinyint + +### csv_funcs + - [ ] from_csv + - [ ] schema_of_csv + - [ ] to_csv + +### datetime_funcs + - [ ] add_months + - [ ] convert_timezone + - [x] curdate + - [x] current_date + - [ ] current_timestamp + - [x] current_timezone + - [ ] date_add + - [ ] date_diff + - [ ] date_format + - [ ] date_from_unix_date + - [x] date_part + - [ ] date_sub + - [ ] date_trunc + - [ ] dateadd + - [ ] datediff + - [x] datepart + - [ ] day + - [ ] dayofmonth + - [ ] dayofweek + - [ ] dayofyear + - [x] extract + - [ ] from_unixtime + - [ ] from_utc_timestamp + - [ ] hour + - [ ] last_day + - [ ] localtimestamp + - [ ] make_date + - [ ] make_dt_interval + - [ ] make_interval + - [ ] make_timestamp + - [ ] make_timestamp_ltz + - [ ] make_timestamp_ntz + - [ ] make_ym_interval + - [ ] minute + - [ ] month + - [ ] months_between + - [ ] next_day + - [ ] now + - [ ] quarter + - [ ] second + - [ ] timestamp_micros + - [ ] timestamp_millis + - [ ] timestamp_seconds + - [ ] to_date + - [ ] to_timestamp + - [ ] to_timestamp_ltz + - [ ] to_timestamp_ntz + - [ ] to_unix_timestamp + - [ ] to_utc_timestamp + - [ ] trunc + - [ ] try_to_timestamp + - [ ] unix_date + - [ ] unix_micros + - [ ] unix_millis + - [ ] unix_seconds + - [ ] unix_timestamp + - [ ] weekday + - [ ] weekofyear + - [ ] year + +### generator_funcs + - [ ] explode + - [ ] explode_outer + - [ ] inline + - [ ] inline_outer + - [ ] posexplode + - [ ] posexplode_outer + - [ ] stack + +### hash_funcs + - [ ] crc32 + - [ ] hash + - [x] md5 + - [ ] sha + - [ ] sha1 + - [ ] sha2 + - [ ] xxhash64 + +### json_funcs + - [ ] from_json + - [ ] get_json_object + - [ ] json_array_length + - [ ] json_object_keys + - [ ] json_tuple + - [ ] schema_of_json + - [ ] to_json + +### lambda_funcs + - [ ] aggregate + - [ ] array_sort + - [ ] exists + - [ ] filter + - [ ] forall + - [ ] map_filter + - [ ] map_zip_with + - [ ] reduce + - [ ] transform + - [ ] transform_keys + - [ ] transform_values + - [ ] zip_with + +### map_funcs + - [ ] element_at + - [ ] map + - [ ] map_concat + - [ ] map_contains_key + - [ ] map_entries + - [ ] map_from_arrays + - [ ] map_from_entries + - [ ] map_keys + - [ ] map_values + - [ ] str_to_map + - [ ] try_element_at + +### math_funcs + - [x] % + - [x] * + - [x] + + - [x] - + - [x] / + - [x] abs + - [x] acos + - [ ] acosh + - [x] asin + - [ ] asinh + - [x] atan + - [x] atan2 + - [ ] atanh + - [ ] bin + - [ ] bround + - [ ] cbrt + - [x] ceil + - [x] ceiling + - [ ] conv + - [x] cos + - [ ] cosh + - [ ] cot + - [ ] csc + - [ ] degrees + - [ ] div + - [ ] e + - [x] exp + - [ ] expm1 + - [ ] factorial + - [x] floor + - [ ] greatest + - [ ] hex + - [ ] hypot + - [ ] least + - [x] ln + - [ ] log + - [x] log10 + - [ ] log1p + - [x] log2 + - [x] mod + - [x] negative + - [ ] pi + - [ ] pmod + - [x] positive + - [x] pow + - [x] power + - [ ] radians + - [ ] rand + - [ ] randn + - [ ] random + - [ ] rint + - [x] round + - [ ] sec + - [x] shiftleft + - [x] sign + - [x] signum + - [x] sin + - [ ] sinh + - [x] sqrt + - [x] tan + - [ ] tanh + - [ ] try_add + - [x] try_divide + - [ ] try_multiply + - [x] try_subtract + - [x] unhex + - [ ] width_bucket + +### misc_funcs + - [ ] aes_decrypt + - [ ] aes_encrypt + - [ ] assert_true + - [x] current_catalog + - [x] current_database + - [x] current_schema + - [x] current_user + - [x] equal_null + - [ ] input_file_block_length + - [ ] input_file_block_start + - [ ] input_file_name + - [ ] monotonically_increasing_id + - [ ] raise_error + - [ ] spark_partition_id + - [ ] typeof + - [x] user + - [ ] uuid + - [ ] version + +### predicate_funcs + - [x] ! + - [x] < + - [x] <= + - [x] <=> + - [x] = + - [x] == + - [x] > + - [x] >= + - [x] and + - [x] ilike + - [x] in + - [ ] isnan + - [x] isnotnull + - [x] isnull + - [x] like + - [x] not + - [x] or + - [ ] regexp + - [ ] regexp_like + - [ ] rlike + +### string_funcs + - [x] ascii + - [ ] base64 + - [x] bit_length + - [x] btrim + - [x] char + - [x] char_length + - [x] character_length + - [x] chr + - [x] concat_ws + - [x] contains + - [ ] decode + - [ ] elt + - [ ] encode + - [x] endswith + - [ ] find_in_set + - [ ] format_number + - [ ] format_string + - [x] initcap + - [x] instr + - [x] lcase + - [ ] left + - [x] len + - [x] length + - [ ] levenshtein + - [ ] locate + - [x] lower + - [ ] lpad + - [x] ltrim + - [ ] mask + - [x] octet_length + - [ ] overlay + - [ ] position + - [ ] printf + - [ ] regexp_count + - [ ] regexp_extract + - [ ] regexp_extract_all + - [ ] regexp_instr + - [ ] regexp_replace + - [ ] regexp_substr + - [x] repeat + - [x] replace + - [ ] right + - [ ] rpad + - [x] rtrim + - [ ] sentences + - [ ] soundex + - [x] space + - [ ] split + - [ ] split_part + - [x] startswith + - [ ] substr + - [ ] substring + - [ ] substring_index + - [ ] to_binary + - [ ] to_char + - [ ] to_number + - [x] translate + - [x] trim + - [ ] try_to_binary + - [ ] try_to_number + - [x] ucase + - [ ] unbase64 + - [x] upper + +### struct_funcs + - [ ] named_struct + - [ ] struct + +### url_funcs + - [ ] parse_url + - [ ] url_decode + - [ ] url_encode + +### window_funcs + - [ ] cume_dist + - [ ] dense_rank + - [ ] lag + - [ ] lead + - [ ] nth_value + - [ ] ntile + - [ ] percent_rank + - [ ] rank + - [ ] row_number + +### xml_funcs + - [ ] xpath + - [ ] xpath_boolean + - [ ] xpath_double + - [ ] xpath_float + - [ ] xpath_int + - [ ] xpath_long + - [ ] xpath_number + - [ ] xpath_short + - [ ] xpath_string \ No newline at end of file diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala index 26c9c8fec..55d45a21b 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala @@ -27,12 +27,17 @@ import scala.sys.process._ import org.scalatest.Ignore import org.scalatest.exceptions.TestFailedException +import org.scalatest.matchers.should.Matchers.convertToAnyShouldWrapper import org.apache.hadoop.fs.Path -import org.apache.spark.sql.CometTestBase +import org.apache.spark.sql.{CometTestBase, DataFrame} +import org.apache.spark.sql.catalyst.analysis.UnresolvedFunction +import org.apache.spark.sql.catalyst.expressions.LambdaFunction +import org.apache.spark.sql.catalyst.plans.logical.Project import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper +import org.apache.spark.sql.functions.collect_list -import org.apache.comet.CoverageResultStatus.CoverageResultStatus +import org.apache.comet.CoverageResultStatus.{CoverageResultStatus, Passed} /** * Manual test to calculate Spark builtin expressions coverage support by the Comet @@ -46,23 +51,103 @@ class CometExpressionCoverageSuite extends CometTestBase with AdaptiveSparkPlanH private val projectDocFolder = "docs" private val rawCoverageFilePath = s"$projectDocFolder/spark_builtin_expr_coverage.txt" + private val mdCoverageFilePath = s"$projectDocFolder/spark_expressions_support.md" private val DATAFUSIONCLI_PATH_ENV_VAR = "DATAFUSIONCLI_PATH" - private val queryPattern = """(?i)SELECT (.+?);""".r private val valuesPattern = """(?i)FROM VALUES(.+?);""".r private val selectPattern = """(i?)SELECT(.+?)FROM""".r - def getExamples(): Map[String, List[String]] = + // exclude funcs Comet has no plans to support streaming in near future + // like spark streaming functions, java calls + private val outOfRoadmapFuncs = + List( + "window", + "session_window", + "window_time", + "java_method", + "reflect", + "current_catalog", + "current_user", + "current_schema", + "current_database") + // Spark Comet configuration to run the tests + private val sqlConf = Seq( + "spark.comet.exec.shuffle.enabled" -> "true", + "spark.sql.optimizer.excludedRules" -> "org.apache.spark.sql.catalyst.optimizer.ConstantFolding", + "spark.sql.adaptive.optimizer.excludedRules" -> "org.apache.spark.sql.catalyst.optimizer.ConstantFolding") + + // Tests to run manually as its syntax is different from usual or nested + // This can be simplified once comet supports MemoryScan, now Comet triggers from the FileScan + // If MemoryScan supported we can just run Spark examples as is + val manualTests: Map[String, (String, String)] = Map( + "!" -> ("select true a", "select ! true from tbl"), + "%" -> ("select 1 a, 2 b", "select a % b from tbl"), + "&" -> ("select 1 a, 2 b", "select a & b from tbl"), + "*" -> ("select 1 a, 2 b", "select a * b from tbl"), + "+" -> ("select 1 a, 2 b", "select a + b from tbl"), + "-" -> ("select 1 a, 2 b", "select a - b from tbl"), + "/" -> ("select 1 a, 2 b", "select a / b from tbl"), + "<" -> ("select 1 a, 2 b", "select a < b from tbl"), + "<=" -> ("select 1 a, 2 b", "select a <= b from tbl"), + "<=>" -> ("select 1 a, 2 b", "select a <=> b from tbl"), + "=" -> ("select 1 a, 2 b", "select a = b from tbl"), + "==" -> ("select 1 a, 2 b", "select a == b from tbl"), + ">" -> ("select 1 a, 2 b", "select a > b from tbl"), + ">=" -> ("select 1 a, 2 b", "select a >= b from tbl"), + "^" -> ("select 1 a, 2 b", "select a ^ b from tbl"), + "|" -> ("select 1 a, 2 b", "select a | b from tbl"), + "try_multiply" -> ("select 2000000 a, 30000000 b", "select try_multiply(a, b) from tbl"), + "try_add" -> ("select 2147483647 a, 1 b", "select try_add(a, b) from tbl"), + "try_subtract" -> ("select cast(-2147483647 as int) a, cast(1 as int) b", "select try_subtract(a, b) from tbl"), + "stack" -> ("select 1 a, 2 b", "select stack(1, a, b) from tbl"), + "~" -> ("select 1 a", "select ~ a from tbl"), + "unhex" -> ("select '537061726B2053514C' a", "select unhex(a) from tbl"), + "when" -> ("select 1 a, 2 b, 3 c, 4 d", "select case a > b then c else d end from tbl"), + "case" -> ("select 1 a, 2 b, 3 c, 4 d", "select case a when 1 then c else d end from tbl"), + "transform_values" -> ("select array(1, 2, 3) a", "select transform_values(map_from_arrays(a, a), (k, v) -> v + 1) from tbl"), + "transform_keys" -> ("select array(1, 2, 3) a", "select transform_keys(map_from_arrays(a, a), (k, v) -> v + 1) from tbl"), + "transform" -> ("select array(1, 2, 3) a", "select transform(a, (k, v) -> v + 1) from tbl"), + "reduce" -> ("select array(1, 2, 3) a", "select reduce(a, 0, (acc, x) -> acc + x) from tbl"), + "struct" -> ("select 1 a, 2 b", "select struct(a, b) from tbl"), + "space" -> ("select 1 a", "select space(a) from tbl"), + "sort_array" -> ("select array('b', 'd', null, 'c', 'a') a", "select sort_array(a) from tbl"), + "or" -> ("select true a, false b", "select a or b from tbl"), + "overlay" -> ("select 'Spark SQL' a", "select overlay(a PLACING '_' FROM 6) from tbl"), + "nvl" -> ("select 1 a, cast(null as int) b", "select nvl(b, a) from tbl"), + "nvl2" -> ("select 1 a, cast(null as int) b, cast(null as int) c", "select nvl2(c, b, a) from tbl"), + "coalesce" -> ("select 1 a, cast(null as int) b, cast(null as int) c", "select coalesce(c, b, a) from tbl"), + "and" -> ("select true a, false b", "select a and b from tbl"), + "not" -> ("select true a", "select not a from tbl"), + "named_struct" -> ("select 1 a", "select named_struct('a', a) from tbl"), + "mod" -> ("select 1 a, 1 b", "select mod(b, a) from tbl"), + "div" -> ("select 1 a, 1 b", "select div(b, a) from tbl"), + "map_zip_with" -> ("select map(1, 'a', 2, 'b') a, map(1, 'x', 2, 'y') b", "SELECT map_zip_with(a, b, (k, v1, v2) -> concat(v1, v2)) from tbl"), + "map_filter" -> ("select map(1, 0, 2, 2, 3, -1) a", "SELECT map_filter(a, (k, v) -> k > v) from tbl"), + "in" -> ("select 1 a", "SELECT a in ('1', '2', '3') from tbl"), + "ifnull" -> ("select 1 a, cast(null as int) b", "SELECT ifnull(b, a) from tbl"), + "from_json" -> ("select '{\"a\":1, \"b\":0.8}' a", "SELECT from_json(a, 'a INT, b DOUBLE') from tbl"), + "from_csv" -> ("select '1, 0.8' a", "SELECT from_csv(a, 'a INT, b DOUBLE') from tbl"), + "forall" -> ("select array(1, 2, 3) a", "SELECT forall(a, x -> x % 2 == 0) from tbl"), + "filter" -> ("select array(1, 2, 3) a", "SELECT filter(a, x -> x % 2 == 1) from tbl"), + "exists" -> ("select array(1, 2, 3) a", "SELECT filter(a, x -> x % 2 == 0) from tbl"), + "aggregate" -> ("select array(1, 2, 3) a", "SELECT aggregate(a, 0, (acc, x) -> acc + x) from tbl"), + "extract" -> ("select TIMESTAMP '2019-08-12 01:00:00.123456' a", "SELECT extract(YEAR FROM a) from tbl"), + "datepart" -> ("select TIMESTAMP '2019-08-12 01:00:00.123456' a", "SELECT datepart('YEAR', a) from tbl"), + "date_part" -> ("select TIMESTAMP '2019-08-12 01:00:00.123456' a", "SELECT date_part('YEAR', a) from tbl"), + "cast" -> ("select '10' a", "SELECT cast(a as int) from tbl"), + "aes_encrypt" -> ("select 'Spark' a, '0000111122223333' b", "SELECT aes_encrypt(a, b) from tbl")) + + // key - function name + // value - examples + def getExamples(): Map[FunctionInfo, List[String]] = spark.sessionState.functionRegistry .listFunction() .map(spark.sessionState.catalog.lookupFunctionInfo(_)) .filter(_.getSource.toLowerCase == "built-in") - // exclude spark streaming functions, Comet has no plans to support streaming in near future - .filter(f => - !List("window", "session_window", "window_time").contains(f.getName.toLowerCase)) + .filter(f => !outOfRoadmapFuncs.contains(f.getName.toLowerCase)) .map(f => { val selectRows = queryPattern.findAllMatchIn(f.getExamples).map(_.group(0)).toList - (f.getName, selectRows.filter(_.nonEmpty)) + (FunctionInfo(f.getName, f.getGroup), selectRows.filter(_.nonEmpty)) }) .toMap @@ -80,7 +165,7 @@ class CometExpressionCoverageSuite extends CometTestBase with AdaptiveSparkPlanH val resultsMap = new mutable.HashMap[String, CoverageResult]() builtinExamplesMap.foreach { - case (funcName, q :: _) => + case (func, q :: _) => var dfMessage: Option[String] = None val queryResult = try { @@ -99,7 +184,7 @@ class CometExpressionCoverageSuite extends CometTestBase with AdaptiveSparkPlanH dfMessage = runDatafusionCli(s"""$s '$path/*.parquet'""") } - testSingleLineQuery(s"select * $v", s"$s tbl") + testSingleLineQuery(s"select * $v", s"$s tbl", sqlConf = sqlConf) case _ => sys.error("Cannot parse properly") } @@ -112,19 +197,62 @@ class CometExpressionCoverageSuite extends CometTestBase with AdaptiveSparkPlanH // ConstantFolding is a operator optimization rule in Catalyst that replaces expressions // that can be statically evaluated with their equivalent literal values. dfMessage = runDatafusionCli(q) - testSingleLineQuery( - "select 'dummy' x", - s"${q.dropRight(1)}, x from tbl", - excludedOptimizerRules = - Some("org.apache.spark.sql.catalyst.optimizer.ConstantFolding")) + + manualTests.get(func.name) match { + // the test is manual query + case Some(test) => testSingleLineQuery(test._1, test._2, sqlConf = sqlConf) + case None => + // extract function arguments as a sql text + // example: + // cos(0) -> 0 + // explode_outer(array(10, 20)) -> array(10, 20) + val args = getSqlFunctionArguments(q.dropRight(1)) + val (aliased, aliases) = + if (Seq( + "bround", + "rlike", + "round", + "to_binary", + "to_char", + "to_number", + "try_to_binary", + "try_to_number", + "xpath", + "xpath_boolean", + "xpath_double", + "xpath_double", + "xpath_float", + "xpath_int", + "xpath_long", + "xpath_number", + "xpath_short", + "xpath_string").contains(func.name.toLowerCase)) { + // c0 column, c1 foldable literal(cannot be from column) + ( + Seq(s"${args.head} as c0").mkString(","), + Seq(s"c0, ${args(1)}").mkString(",")) + } else { + ( + args.zipWithIndex.map(x => s"${x._1} as c${x._2}").mkString(","), + args.zipWithIndex.map(x => s"c${x._2}").mkString(",")) + } + + val select = s"select ${func.name}($aliases)" + + testSingleLineQuery( + s"select ${if (aliased.nonEmpty) aliased else 1}", + s"$select from tbl", + sqlConf = sqlConf) + } } + CoverageResult( q, CoverageResultStatus.Passed, CoverageResultDetails( cometMessage = "OK", - datafusionMessage = dfMessage.getOrElse("OK"))) - + datafusionMessage = dfMessage.getOrElse("OK")), + group = func.group) } catch { case e: TestFailedException if e.getMessage.contains("Expected only Comet native operators") => @@ -134,7 +262,18 @@ class CometExpressionCoverageSuite extends CometTestBase with AdaptiveSparkPlanH CoverageResultDetails( cometMessage = "Unsupported: Expected only Comet native operators but found Spark fallback", - datafusionMessage = dfMessage.getOrElse(""))) + datafusionMessage = dfMessage.getOrElse("")), + group = func.group) + + case e: TestFailedException + if e.getMessage.contains("Results do not match for query") => + CoverageResult( + q, + CoverageResultStatus.Failed, + CoverageResultDetails( + cometMessage = "Unsupported: Results do not match for query", + datafusionMessage = dfMessage.getOrElse("")), + group = func.group) case e if e.getMessage.contains("CometNativeException") => CoverageResult( @@ -142,7 +281,8 @@ class CometExpressionCoverageSuite extends CometTestBase with AdaptiveSparkPlanH CoverageResultStatus.Failed, CoverageResultDetails( cometMessage = "Failed on native side: found CometNativeException", - datafusionMessage = dfMessage.getOrElse(""))) + datafusionMessage = dfMessage.getOrElse("")), + group = func.group) case e => CoverageResult( @@ -150,23 +290,24 @@ class CometExpressionCoverageSuite extends CometTestBase with AdaptiveSparkPlanH CoverageResultStatus.Failed, CoverageResultDetails( cometMessage = e.getMessage, - datafusionMessage = dfMessage.getOrElse(""))) + datafusionMessage = dfMessage.getOrElse("")), + group = func.group) } - resultsMap.put(funcName, queryResult) + resultsMap.put(func.name, queryResult) // Function with no examples - case (funcName, List()) => + case (func, List()) => resultsMap.put( - funcName, + func.name, CoverageResult( "", CoverageResultStatus.Skipped, CoverageResultDetails( cometMessage = "No examples found in spark.sessionState.functionRegistry", - datafusionMessage = ""))) + datafusionMessage = ""), + group = func.group)) } - // TODO: convert results into HTML or .md file resultsMap.toSeq.toDF("name", "details").createOrReplaceTempView("t") val str = showString( @@ -175,6 +316,113 @@ class CometExpressionCoverageSuite extends CometTestBase with AdaptiveSparkPlanH 1000, 0) Files.write(Paths.get(rawCoverageFilePath), str.getBytes(StandardCharsets.UTF_8)) + Files.write( + Paths.get(mdCoverageFilePath), + generateMarkdown(spark.sql("select * from t")).getBytes(StandardCharsets.UTF_8)) + } + + test("Test markdown") { + val map = new scala.collection.mutable.HashMap[String, CoverageResult]() + map.put( + "f1", + CoverageResult("q1", CoverageResultStatus.Passed, CoverageResultDetails("", ""), "group1")) + map.put( + "f2", + CoverageResult( + "q2", + CoverageResultStatus.Failed, + CoverageResultDetails("err", "err"), + "group1")) + map.put( + "f3", + CoverageResult("q3", CoverageResultStatus.Passed, CoverageResultDetails("", ""), "group2")) + map.put( + "f4", + CoverageResult( + "q4", + CoverageResultStatus.Failed, + CoverageResultDetails("err", "err"), + "group2")) + map.put( + "f5", + CoverageResult("q5", CoverageResultStatus.Passed, CoverageResultDetails("", ""), "group3")) + val str = generateMarkdown(map.toSeq.toDF("name", "details")) + str shouldBe s"${getLicenseHeader()}\n# Supported Spark Expressions\n\n### group1\n - [x] f1\n - [ ] f2\n\n### group2\n - [x] f3\n - [ ] f4\n\n### group3\n - [x] f5" + } + + test("get sql function arguments") { + getSqlFunctionArguments( + "SELECT unix_seconds(TIMESTAMP('1970-01-01 00:00:01Z'))") shouldBe Seq( + "TIMESTAMP('1970-01-01 00:00:01Z')") + getSqlFunctionArguments("SELECT decode(unhex('537061726B2053514C'), 'UTF-8')") shouldBe Seq( + "unhex('537061726B2053514C')", + "'UTF-8'") + getSqlFunctionArguments( + "SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456')") shouldBe Seq( + "'YEAR'", + "TIMESTAMP '2019-08-12 01:00:00.123456'") + getSqlFunctionArguments("SELECT exists(array(1, 2, 3), x -> x % 2 == 0)") shouldBe Seq( + "array(1, 2, 3)") + getSqlFunctionArguments("select to_char(454, '999')") shouldBe Seq("454", "'999'") + } + + def getSqlFunctionArguments(sql: String): Seq[String] = { + val plan = spark.sessionState.sqlParser.parsePlan(sql) + plan match { + case Project(projectList, _) => + // unwrap projection to get first expression arguments + // assuming first expression is Unresolved function + val projection = projectList.head.children.head.asInstanceOf[UnresolvedFunction].arguments + projection.filter(!_.isInstanceOf[LambdaFunction]).map(_.sql) + } + } + + def generateMarkdown(df: DataFrame): String = { + val groupedDF = df + .orderBy("name") + .groupBy("details.group") + .agg(collect_list("name").as("names"), collect_list("details.result").as("statuses")) + .orderBy("group") + val sb = new StringBuilder(s"${getLicenseHeader()}\n# Supported Spark Expressions") + groupedDF.collect().foreach { row => + val groupName = row.getAs[String]("group") + val names = row.getAs[Seq[String]]("names") + val statuses = row.getAs[Seq[String]]("statuses") + + val passedMarks = names + .zip(statuses) + .map(x => + x._2 match { + case s if s == Passed.toString => s" - [x] ${x._1}" + case _ => s" - [ ] ${x._1}" + }) + + sb.append(s"\n\n### $groupName\n" + passedMarks.mkString("\n")) + } + + sb.result() + } + + private def getLicenseHeader(): String = { + """ + |""".stripMargin } // Returns execution error, None means successful execution @@ -224,6 +472,9 @@ object CoverageResultStatus extends Enumeration { case class CoverageResult( query: String, result: CoverageResultStatus, - details: CoverageResultDetails) + details: CoverageResultDetails, + group: String) case class CoverageResultDetails(cometMessage: String, datafusionMessage: String) + +case class FunctionInfo(name: String, group: String) diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala index d8c82f12b..ddeb4fb50 100644 --- a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala @@ -787,7 +787,7 @@ abstract class CometTestBase testQuery: String, testName: String = "test", tableName: String = "tbl", - excludedOptimizerRules: Option[String] = None): Unit = { + sqlConf: Seq[(String, String)] = Seq.empty): Unit = { withTempDir { dir => val path = new Path(dir.toURI.toString, testName).toUri.toString @@ -803,9 +803,7 @@ abstract class CometTestBase spark.createDataFrame(data, schema).repartition(1).write.parquet(path) readParquetFile(path, Some(schema)) { df => df.createOrReplaceTempView(tableName) } - withSQLConf( - "spark.sql.optimizer.excludedRules" -> excludedOptimizerRules.getOrElse(""), - "spark.sql.adaptive.optimizer.excludedRules" -> excludedOptimizerRules.getOrElse("")) { + withSQLConf(sqlConf: _*) { checkSparkAnswerAndOperator(sql(testQuery)) } } From 7ab37ebdac1e077e157e292fbddf7ca23e78429c Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 5 Jun 2024 15:31:16 -0600 Subject: [PATCH 3/6] simplify code in CometExecIterator and avoid some small overhead (#522) --- .../org/apache/comet/CometExecIterator.scala | 41 +++++++------------ 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/CometExecIterator.scala b/spark/src/main/scala/org/apache/comet/CometExecIterator.scala index b3604c9e0..89225c0d6 100644 --- a/spark/src/main/scala/org/apache/comet/CometExecIterator.scala +++ b/spark/src/main/scala/org/apache/comet/CometExecIterator.scala @@ -67,20 +67,6 @@ class CometExecIterator( private var currentBatch: ColumnarBatch = null private var closed: Boolean = false - private def executeNative(): ExecutionState = { - val result = nativeLib.executePlan(plan) - - val flag = result(0) - if (flag == -1) EOF - else if (flag == 1) { - val numRows = result(1) - val addresses = result.slice(2, result.length) - Batch(numRows = numRows.toInt, addresses = addresses) - } else { - throw new IllegalStateException(s"Invalid native flag: $flag") - } - } - /** * Creates a new configuration map to be passed to the native side. */ @@ -110,21 +96,22 @@ class CometExecIterator( result } - /** Execution result from Comet native */ - trait ExecutionState - - /** A new batch is available */ - case class Batch(numRows: Int, addresses: Array[Long]) extends ExecutionState - - /** The execution is finished - no more batch */ - case object EOF extends ExecutionState - def getNextBatch(): Option[ColumnarBatch] = { - executeNative() match { - case EOF => None - case Batch(numRows, addresses) => + // we execute the native plan each time we need another output batch and this could + // result in multiple input batches being processed + val result = nativeLib.executePlan(plan) + + result(0) match { + case -1 => + // EOF + None + case 1 => + val numRows = result(1) + val addresses = result.slice(2, result.length) val cometVectors = nativeUtil.importVector(addresses) - Some(new ColumnarBatch(cometVectors.toArray, numRows)) + Some(new ColumnarBatch(cometVectors.toArray, numRows.toInt)) + case flag => + throw new IllegalStateException(s"Invalid native flag: $flag") } } From c819bc0b0d3d1c98e6b36fcafcf184f5bb4b2c2c Mon Sep 17 00:00:00 2001 From: Semyon Date: Thu, 6 Jun 2024 00:13:04 +0200 Subject: [PATCH 4/6] Small changes in docs (#512) ## Which issue does this PR close? Closes #503 Closes #191 ## Rationale for this change 1. Provide a way to build Comet from the source on an isolated environments with an access to github.com 2. Update documentation in part, related to compatibility of Spark AQE and Comet Shuffle ## What changes are included in this PR? - Update tuning section about the compatibility of Shuffle and Spark AQE - Add `release-nogit` for building on an isolated environments - Update docs in the section about an installation process Changes to be committed: modified: Makefile modified: docs/source/user-guide/installation.md modified: docs/source/user-guide/tuning.md ## How are these changes tested? I run both `make release` and `make release-nogit`. The first one created properties file in `common/target/classes` but the second did not. The flag `-Dmaven.gitcommitid.skip=true` is described in [this comment](https://github.com/git-commit-id/git-commit-id-maven-plugin/issues/392#issuecomment-432309487). --- Makefile | 3 +++ docs/source/user-guide/installation.md | 6 ++++++ docs/source/user-guide/tuning.md | 2 ++ 3 files changed, 11 insertions(+) diff --git a/Makefile b/Makefile index b9b9707ba..573a7f955 100644 --- a/Makefile +++ b/Makefile @@ -77,6 +77,9 @@ release-linux: clean release: cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo build --release ./mvnw install -Prelease -DskipTests $(PROFILES) +release-nogit: + cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo build --features nightly --release + ./mvnw install -Prelease -DskipTests $(PROFILES) -Dmaven.gitcommitid.skip=true benchmark-%: clean release cd spark && COMET_CONF_DIR=$(shell pwd)/conf MAVEN_OPTS='-Xmx20g' ../mvnw exec:java -Dexec.mainClass="$*" -Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false" -Dexec.args="$(filter-out $@,$(MAKECMDGOALS))" $(PROFILES) .DEFAULT: diff --git a/docs/source/user-guide/installation.md b/docs/source/user-guide/installation.md index 03ecc53ed..7335a488c 100644 --- a/docs/source/user-guide/installation.md +++ b/docs/source/user-guide/installation.md @@ -57,6 +57,12 @@ Note that the project builds for Scala 2.12 by default but can be built for Scal make release PROFILES="-Pspark-3.4 -Pscala-2.13" ``` +To build Comet from the source distribution on an isolated environment without an access to `github.com` it is necessary to disable `git-commit-id-maven-plugin`, otherwise you will face errors that there is no access to the git during the build process. In that case you may use: + +```console +make release-nogit PROFILES="-Pspark-3.4" +``` + ## Run Spark Shell with Comet enabled Make sure `SPARK_HOME` points to the same Spark version as Comet was built for. diff --git a/docs/source/user-guide/tuning.md b/docs/source/user-guide/tuning.md index 5a3100bd0..f46ab9e0e 100644 --- a/docs/source/user-guide/tuning.md +++ b/docs/source/user-guide/tuning.md @@ -39,6 +39,8 @@ It must be set before the Spark context is created. You can enable or disable Co at runtime by setting `spark.comet.exec.shuffle.enabled` to `true` or `false`. Once it is disabled, Comet will fallback to the default Spark shuffle manager. +> **_NOTE:_** At the moment Comet Shuffle is not compatible with Spark AQE partition coalesce. To disable set `spark.sql.adaptive.coalescePartitions.enabled` to `false`. + ### Shuffle Mode Comet provides three shuffle modes: Columnar Shuffle, Native Shuffle and Auto Mode. From 23e0801bc035c33313b113c8f4c94f2ea21e4187 Mon Sep 17 00:00:00 2001 From: advancedxy Date: Thu, 6 Jun 2024 10:03:35 +0800 Subject: [PATCH 5/6] chore: Upgrade spark to 4.0.0-preview1 (#526) --- .github/workflows/pr_build.yml | 27 --------------------------- pom.xml | 2 +- 2 files changed, 1 insertion(+), 28 deletions(-) diff --git a/.github/workflows/pr_build.yml b/.github/workflows/pr_build.yml index 503978fc5..2bf023357 100644 --- a/.github/workflows/pr_build.yml +++ b/.github/workflows/pr_build.yml @@ -97,15 +97,6 @@ jobs: with: rust-version: ${{env.RUST_VERSION}} jdk-version: ${{ matrix.java_version }} - - name: Clone Spark - uses: actions/checkout@v4 - with: - repository: "apache/spark" - path: "apache-spark" - - name: Install Spark - shell: bash - working-directory: ./apache-spark - run: build/mvn install -Phive -Phadoop-cloud -DskipTests - name: Java test steps uses: ./.github/actions/java-test with: @@ -223,15 +214,6 @@ jobs: with: rust-version: ${{env.RUST_VERSION}} jdk-version: ${{ matrix.java_version }} - - name: Clone Spark - uses: actions/checkout@v4 - with: - repository: "apache/spark" - path: "apache-spark" - - name: Install Spark - shell: bash - working-directory: ./apache-spark - run: build/mvn install -Phive -Phadoop-cloud -DskipTests - name: Java test steps uses: ./.github/actions/java-test with: @@ -261,15 +243,6 @@ jobs: jdk-version: ${{ matrix.java_version }} jdk-architecture: aarch64 protoc-architecture: aarch_64 - - name: Clone Spark - uses: actions/checkout@v4 - with: - repository: "apache/spark" - path: "apache-spark" - - name: Install Spark - shell: bash - working-directory: ./apache-spark - run: build/mvn install -Phive -Phadoop-cloud -DskipTests - name: Java test steps uses: ./.github/actions/java-test with: diff --git a/pom.xml b/pom.xml index 8c322bae0..34d949e14 100644 --- a/pom.xml +++ b/pom.xml @@ -540,7 +540,7 @@ under the License. 2.13.13 2.13 - 4.0.0-SNAPSHOT + 4.0.0-preview1 4.0 1.13.1 spark-4.0 From 6143e7a9973521844fb7e898a0f22a9c185972bc Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 5 Jun 2024 20:52:50 -0700 Subject: [PATCH 6/6] chore: Add UnboundColumn to carry datatype for unbound reference (#518) * chore: Add UnboundColumn to carry datatype for unbound reference * Update core/src/execution/datafusion/expressions/unbound.rs --- .../execution/datafusion/expressions/mod.rs | 1 + .../datafusion/expressions/unbound.rs | 110 ++++++++++++++++++ core/src/execution/datafusion/planner.rs | 11 +- 3 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 core/src/execution/datafusion/expressions/unbound.rs diff --git a/core/src/execution/datafusion/expressions/mod.rs b/core/src/execution/datafusion/expressions/mod.rs index 084fef2df..05230b4c2 100644 --- a/core/src/execution/datafusion/expressions/mod.rs +++ b/core/src/execution/datafusion/expressions/mod.rs @@ -36,5 +36,6 @@ pub mod strings; pub mod subquery; pub mod sum_decimal; pub mod temporal; +pub mod unbound; mod utils; pub mod variance; diff --git a/core/src/execution/datafusion/expressions/unbound.rs b/core/src/execution/datafusion/expressions/unbound.rs new file mode 100644 index 000000000..5387b1012 --- /dev/null +++ b/core/src/execution/datafusion/expressions/unbound.rs @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::execution::datafusion::expressions::utils::down_cast_any_ref; +use arrow_array::RecordBatch; +use arrow_schema::{DataType, Schema}; +use datafusion::physical_plan::ColumnarValue; +use datafusion_common::{internal_err, Result}; +use datafusion_physical_expr::PhysicalExpr; +use std::{ + any::Any, + hash::{Hash, Hasher}, + sync::Arc, +}; + +/// This is similar to `UnKnownColumn` in DataFusion, but it has data type. +/// This is only used when the column is not bound to a schema, for example, the +/// inputs to aggregation functions in final aggregation. In the case, we cannot +/// bind the aggregation functions to the input schema which is grouping columns +/// and aggregate buffer attributes in Spark (DataFusion has different design). +/// But when creating certain aggregation functions, we need to know its input +/// data types. As `UnKnownColumn` doesn't have data type, we implement this +/// `UnboundColumn` to carry the data type. +#[derive(Debug, Hash, PartialEq, Eq, Clone)] +pub struct UnboundColumn { + name: String, + datatype: DataType, +} + +impl UnboundColumn { + /// Create a new unbound column expression + pub fn new(name: &str, datatype: DataType) -> Self { + Self { + name: name.to_owned(), + datatype, + } + } + + /// Get the column name + pub fn name(&self) -> &str { + &self.name + } +} + +impl std::fmt::Display for UnboundColumn { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}, datatype: {}", self.name, self.datatype) + } +} + +impl PhysicalExpr for UnboundColumn { + /// Return a reference to Any that can be used for downcasting + fn as_any(&self) -> &dyn std::any::Any { + self + } + + /// Get the data type of this expression, given the schema of the input + fn data_type(&self, _input_schema: &Schema) -> Result { + Ok(self.datatype.clone()) + } + + /// Decide whether this expression is nullable, given the schema of the input + fn nullable(&self, _input_schema: &Schema) -> Result { + Ok(true) + } + + /// Evaluate the expression + fn evaluate(&self, _batch: &RecordBatch) -> Result { + internal_err!("UnboundColumn::evaluate() should not be called") + } + + fn children(&self) -> Vec> { + vec![] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + Ok(self) + } + + fn dyn_hash(&self, state: &mut dyn Hasher) { + let mut s = state; + self.hash(&mut s); + } +} + +impl PartialEq for UnboundColumn { + fn eq(&self, other: &dyn Any) -> bool { + down_cast_any_ref(other) + .downcast_ref::() + .map(|x| self == x) + .unwrap_or(false) + } +} diff --git a/core/src/execution/datafusion/planner.rs b/core/src/execution/datafusion/planner.rs index a5bcf5654..7af5f6838 100644 --- a/core/src/execution/datafusion/planner.rs +++ b/core/src/execution/datafusion/planner.rs @@ -33,7 +33,7 @@ use datafusion::{ expressions::{ in_list, BinaryExpr, BitAnd, BitOr, BitXor, CaseExpr, CastExpr, Column, Count, FirstValue, InListExpr, IsNotNullExpr, IsNullExpr, LastValue, - Literal as DataFusionLiteral, Max, Min, NotExpr, Sum, UnKnownColumn, + Literal as DataFusionLiteral, Max, Min, NotExpr, Sum, }, AggregateExpr, PhysicalExpr, PhysicalSortExpr, ScalarFunctionExpr, }, @@ -78,6 +78,7 @@ use crate::{ subquery::Subquery, sum_decimal::SumDecimal, temporal::{DateTruncExec, HourExec, MinuteExec, SecondExec, TimestampTruncExec}, + unbound::UnboundColumn, variance::Variance, NormalizeNaNAndZero, }, @@ -239,7 +240,13 @@ impl PhysicalPlanner { let field = input_schema.field(idx); Ok(Arc::new(Column::new(field.name().as_str(), idx))) } - ExprStruct::Unbound(unbound) => Ok(Arc::new(UnKnownColumn::new(unbound.name.as_str()))), + ExprStruct::Unbound(unbound) => { + let data_type = to_arrow_datatype(unbound.datatype.as_ref().unwrap()); + Ok(Arc::new(UnboundColumn::new( + unbound.name.as_str(), + data_type, + ))) + } ExprStruct::IsNotNull(is_notnull) => { let child = self.create_expr(is_notnull.child.as_ref().unwrap(), input_schema)?; Ok(Arc::new(IsNotNullExpr::new(child)))