build: Enable spark-4.0 Spark tests

kazuyukitanimura · Jun 11, 2024 · 8ce4344 · 8ce4344
1 parent 9cf702d
commit 8ce4344
Showing 1 changed file with 100 additions and 8 deletions.
diff --git a/dev/diffs/4.0.0-preview1.diff b/dev/diffs/4.0.0-preview1.diff
@@ -415,7 +415,7 @@ index 16a493b5290..3f0b70e2d59 100644
      assert(exchanges.size == 2)
    }
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
-index 2c24cc7d570..d9564511e2d 100644
+index 2c24cc7d570..50a2ce86117 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
 @@ -22,6 +22,7 @@ import org.scalatest.GivenWhenThen
@@ -436,7 +436,37 @@ index 2c24cc7d570..d9564511e2d 100644
        case _ => Nil
      }
    }
-@@ -1187,7 +1191,8 @@ abstract class DynamicPartitionPruningSuiteBase
+@@ -665,7 +669,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+
+-  test("partition pruning in broadcast hash joins with aliases") {
++  test("partition pruning in broadcast hash joins with aliases",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     Given("alias with simple join condition, using attribute names only")
+     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
+       val df = sql(
+@@ -755,7 +760,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+
+-  test("partition pruning in broadcast hash joins") {
++  test("partition pruning in broadcast hash joins",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     Given("disable broadcast pruning and disable subquery duplication")
+     withSQLConf(
+       SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true",
+@@ -990,7 +996,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+
+-  test("different broadcast subqueries with identical children") {
++  test("different broadcast subqueries with identical children",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
+       withTable("fact", "dim") {
+         spark.range(100).select(
+@@ -1187,7 +1194,8 @@ abstract class DynamicPartitionPruningSuiteBase
      }
    }
 
@@ -446,7 +476,7 @@ index 2c24cc7d570..d9564511e2d 100644
      withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
        val df = sql(
          """
-@@ -1238,7 +1243,8 @@ abstract class DynamicPartitionPruningSuiteBase
+@@ -1238,7 +1246,8 @@ abstract class DynamicPartitionPruningSuiteBase
      }
    }
 
@@ -456,7 +486,27 @@ index 2c24cc7d570..d9564511e2d 100644
      Given("dynamic pruning filter on the build side")
      withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
        val df = sql(
-@@ -1486,7 +1492,7 @@ abstract class DynamicPartitionPruningSuiteBase
+@@ -1311,7 +1320,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+
+-  test("SPARK-32817: DPP throws error when the broadcast side is empty") {
++  test("SPARK-32817: DPP throws error when the broadcast side is empty",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withSQLConf(
+       SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
+       SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true",
+@@ -1471,7 +1481,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     checkAnswer(df, Row(3, 2) :: Row(3, 2) :: Row(3, 2) :: Row(3, 2) :: Nil)
+   }
+
+-  test("SPARK-36444: Remove OptimizeSubqueries from batch of PartitionPruning") {
++  test("SPARK-36444: Remove OptimizeSubqueries from batch of PartitionPruning",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+       val df = sql(
+         """
+@@ -1486,7 +1497,7 @@ abstract class DynamicPartitionPruningSuiteBase
    }
 
    test("SPARK-38148: Do not add dynamic partition pruning if there exists static partition " +
@@ -465,7 +515,37 @@ index 2c24cc7d570..d9564511e2d 100644
      withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
        Seq(
          "f.store_id = 1" -> false,
-@@ -1730,6 +1736,8 @@ abstract class DynamicPartitionPruningV1Suite extends DynamicPartitionPruningDat
+@@ -1558,7 +1569,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+
+-  test("SPARK-38674: Remove useless deduplicate in SubqueryBroadcastExec") {
++  test("SPARK-38674: Remove useless deduplicate in SubqueryBroadcastExec",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withTable("duplicate_keys") {
+       withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+         Seq[(Int, String)]((1, "NL"), (1, "NL"), (3, "US"), (3, "US"), (3, "US"))
+@@ -1589,7 +1601,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+
+-  test("SPARK-39338: Remove dynamic pruning subquery if pruningKey's references is empty") {
++  test("SPARK-39338: Remove dynamic pruning subquery if pruningKey's references is empty",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+       val df = sql(
+         """
+@@ -1618,7 +1631,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+
+-  test("SPARK-39217: Makes DPP support the pruning side has Union") {
++  test("SPARK-39217: Makes DPP support the pruning side has Union",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+       val df = sql(
+         """
+@@ -1730,6 +1744,8 @@ abstract class DynamicPartitionPruningV1Suite extends DynamicPartitionPruningDat
                case s: BatchScanExec =>
                  // we use f1 col for v2 tables due to schema pruning
                  s.output.exists(_.exists(_.argString(maxFields = 100).contains("f1")))
@@ -1079,13 +1159,15 @@ index 15de4c5cc5b..6a85dfb6883 100644
 
    setupTestData()
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
-index b5bac8079c4..bf54af24a30 100644
+index b5bac8079c4..544c1ddc697 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
-@@ -18,6 +18,7 @@
+@@ -17,7 +17,8 @@
+
  package org.apache.spark.sql.execution
 
- import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+-import org.apache.spark.sql.{DataFrame, QueryTest, Row}
++import org.apache.spark.sql.{DataFrame, IgnoreComet, QueryTest, Row}
 +import org.apache.spark.sql.comet.CometProjectExec
  import org.apache.spark.sql.connector.SimpleWritableDataSource
  import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
@@ -1102,6 +1184,16 @@ index b5bac8079c4..bf54af24a30 100644
        assert(actual == expected)
      }
    }
+@@ -112,7 +116,8 @@ abstract class RemoveRedundantProjectsSuiteBase
+     assertProjectExec(query, 1, 3)
+   }
+
+-  test("join with ordering requirement") {
++  test("join with ordering requirement",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     val query = "select * from (select key, a, c, b from testView) as t1 join " +
+       "(select key, a, b, c from testView) as t2 on t1.key = t2.key where t2.a > 50"
+     assertProjectExec(query, 2, 2)
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
 index 005e764cc30..92ec088efab 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala