add links to tracking issue

apache · Jul 1, 2024 · 6e93f4b · 6e93f4b
1 parent 345592a
commit 6e93f4b
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 17 deletions.
diff --git a/dev/diffs/3.5.1.diff b/dev/diffs/3.5.1.diff
@@ -1533,7 +1533,7 @@ index 68bae34790a..ea906fd1adc 100644
          assert(shuffles2.size == 4)
          val smj2 = findTopLevelSortMergeJoin(adaptive2)
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
-index 15055a276fa..0f3748b965e 100644
+index 15055a276fa..6e60b94dc3d 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
 @@ -23,7 +23,7 @@ import java.text.SimpleDateFormat
@@ -1545,82 +1545,90 @@ index 15055a276fa..0f3748b965e 100644
  import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
  import org.apache.spark.sql.catalyst.trees.TreeNodeTag
  import org.apache.spark.sql.execution.FileSourceScanExec
-@@ -116,7 +116,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
+@@ -116,7 +116,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
        testName: String, fileSchema: StructType)
      (f: (DataFrame, Map[String, Any], Map[String, Any]) => Unit): Unit = {
      Seq("json", "parquet").foreach { testFileFormat =>
 -      test(s"metadata struct ($testFileFormat): " + testName) {
 +      test(s"metadata struct ($testFileFormat): " + testName,
++          // https://github.com/apache/datafusion-comet/issues/617
 +          IgnoreComet("TODO: fix Comet for this test")) {
          withTempDir { dir =>
            import scala.collection.JavaConverters._
 
-@@ -767,7 +768,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
+@@ -767,7 +769,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
 
    Seq(true, false).foreach { useVectorizedReader =>
      val label = if (useVectorizedReader) "reading batches" else "reading rows"
 -    test(s"SPARK-39806: metadata for a partitioned table ($label)") {
 +    test(s"SPARK-39806: metadata for a partitioned table ($label)",
++        // https://github.com/apache/datafusion-comet/issues/617
 +        IgnoreComet("TODO: fix Comet for this test")) {
        withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> useVectorizedReader.toString) {
          withTempPath { dir =>
            // Store dynamically partitioned data.
-@@ -789,7 +791,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
+@@ -789,7 +793,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
    }
 
    Seq("parquet", "orc").foreach { format =>
 -    test(s"SPARK-40918: Output cols around WSCG.isTooManyFields limit in $format") {
 +    test(s"SPARK-40918: Output cols around WSCG.isTooManyFields limit in $format",
++        // https://github.com/apache/datafusion-comet/issues/617
 +        IgnoreComet("TODO: fix Comet for this test")) {
        // The issue was that ParquetFileFormat would not count the _metadata columns towards
        // the WholeStageCodegenExec.isTooManyFields limit, while FileSourceScanExec would,
        // resulting in Parquet reader returning columnar output, while scan expected row.
-@@ -862,7 +865,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
+@@ -862,7 +868,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
      }
    }
 
 -  test("SPARK-41896: Filter on constant and generated metadata attributes at the same time") {
 +  test("SPARK-41896: Filter on constant and generated metadata attributes at the same time",
++      // https://github.com/apache/datafusion-comet/issues/617
 +      IgnoreComet("TODO: fix Comet for this test")) {
      withTempPath { dir =>
        val idColumnName = "id"
        val partitionColumnName = "partition"
-@@ -897,7 +901,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
+@@ -897,7 +905,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
      }
    }
 
 -  test("SPARK-41896: Filter by a function that takes the metadata struct as argument") {
 +  test("SPARK-41896: Filter by a function that takes the metadata struct as argument",
++      // https://github.com/apache/datafusion-comet/issues/617
 +      IgnoreComet("TODO: fix Comet for this test")) {
      withTempPath { dir =>
        val idColumnName = "id"
        val numFiles = 4
-@@ -984,7 +989,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
+@@ -984,7 +994,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
 
 
    Seq("parquet", "json", "csv", "text", "orc").foreach { format =>
 -    test(s"metadata file path is url encoded for format: $format") {
 +    test(s"metadata file path is url encoded for format: $format",
++        // https://github.com/apache/datafusion-comet/issues/617
 +        IgnoreComet("TODO: fix Comet for this test")) {
        withTempPath { f =>
          val dirWithSpace = s"$f/with space"
          spark.range(10)
-@@ -1002,7 +1008,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
+@@ -1002,7 +1014,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
        }
      }
 
 -    test(s"metadata file name is url encoded for format: $format") {
 +    test(s"metadata file name is url encoded for format: $format",
++        // https://github.com/apache/datafusion-comet/issues/617
 +        IgnoreComet("TODO: fix Comet for this test")) {
        val suffix = if (format == "text") ".txt" else s".$format"
        withTempPath { f =>
          val dirWithSpace = s"$f/with space"
-@@ -1056,7 +1063,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
+@@ -1056,7 +1070,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
      }
    }
 
 -  test("SPARK-43450: Filter on full _metadata column struct") {
 +  test("SPARK-43450: Filter on full _metadata column struct",
++      // https://github.com/apache/datafusion-comet/issues/617
 +      IgnoreComet("TODO: fix Comet for this test")) {
      withTempPath { dir =>
        val numRows = 10
@@ -1749,7 +1757,7 @@ index 07e2849ce6f..3e73645b638 100644
        ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString
      )
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala
-index c10e1799702..2f78f6c44e4 100644
+index c10e1799702..f18ca092dba 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala
 @@ -16,7 +16,7 @@
@@ -1761,11 +1769,12 @@ index c10e1799702..2f78f6c44e4 100644
  import org.apache.spark.sql.execution.datasources.FileFormat
  import org.apache.spark.sql.functions.{col, lit}
  import org.apache.spark.sql.internal.SQLConf
-@@ -219,7 +219,8 @@ class ParquetFileMetadataStructRowIndexSuite extends QueryTest with SharedSparkS
+@@ -219,7 +219,9 @@ class ParquetFileMetadataStructRowIndexSuite extends QueryTest with SharedSparkS
      }
    }
 
 -  test(s"read user created ${FileFormat.METADATA_NAME}.${ROW_INDEX} column") {
++  // https://github.com/apache/datafusion-comet/issues/617
 +  test(s"read user created ${FileFormat.METADATA_NAME}.${ROW_INDEX} column",
 +      IgnoreComet("TODO: fix Comet for this test")) {
      withReadDataFrame("parquet", partitionCol = "pb") { df =>
@@ -2119,7 +2128,7 @@ index d083cac48ff..3c11bcde807 100644
    import testImplicits._
 
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
-index 746f289c393..8da2335f7a3 100644
+index 746f289c393..bc01ffd52ea 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
 @@ -25,10 +25,11 @@ import org.apache.spark.sql.catalyst.expressions
@@ -2236,17 +2245,18 @@ index 746f289c393..8da2335f7a3 100644
        checkAnswer(aggDF, df1.groupBy("j").agg(max("k")))
      }
    }
-@@ -1013,7 +1039,8 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
+@@ -1013,7 +1039,9 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
      }
    }
 
 -  test("bucket coalescing is applied when join expressions match with partitioning expressions") {
++  // https://github.com/apache/datafusion-comet/issues/617
 +  test("bucket coalescing is applied when join expressions match with partitioning expressions",
 +      IgnoreComet("TODO: fix Comet for this test")) {
      withTable("t1", "t2", "t3") {
        df1.write.format("parquet").bucketBy(8, "i", "j").saveAsTable("t1")
        df2.write.format("parquet").bucketBy(4, "i", "j").saveAsTable("t2")
-@@ -1029,15 +1056,23 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
+@@ -1029,15 +1057,23 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
            Seq(true, false).foreach { aqeEnabled =>
              withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> aqeEnabled.toString) {
                val plan = sql(query).queryExecution.executedPlan
@@ -2495,7 +2505,7 @@ index b4c4ec7acbf..20579284856 100644
 
          val aggregateExecsWithoutPartialAgg = allAggregateExecs.filter {
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
-index 3e1bc57dfa2..0d151977165 100644
+index 3e1bc57dfa2..662640af934 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
 @@ -28,10 +28,7 @@ import org.apache.commons.io.FileUtils
@@ -2510,7 +2520,7 @@ index 3e1bc57dfa2..0d151977165 100644
  import org.apache.spark.sql.execution.streaming.{MemoryStream, StatefulOperatorStateInfo, StreamingSymmetricHashJoinExec, StreamingSymmetricHashJoinHelper}
  import org.apache.spark.sql.execution.streaming.state.{RocksDBStateStoreProvider, StateStore, StateStoreProviderId}
  import org.apache.spark.sql.functions._
-@@ -594,40 +591,9 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
+@@ -594,40 +591,10 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
        CheckNewAnswer((5, 10, 5, 15, 5, 25)))
    }
 
@@ -2548,6 +2558,7 @@ index 3e1bc57dfa2..0d151977165 100644
 -                && opA.numPartitions == numPartitions && opB.numPartitions == numPartitions => j
 -        }.size == 1)
 -      })
++  // https://github.com/apache/datafusion-comet/issues/617
 +  test("streaming join should require StatefulOpClusteredDistribution from children",
 +      IgnoreComet("TODO: fix Comet for this test")) {
 +    fail("TODO fix diff")

diff --git a/docs/source/contributor-guide/spark-sql-tests.md b/docs/source/contributor-guide/spark-sql-tests.md
@@ -118,7 +118,7 @@ wiggle --replace ./sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.sc
 ## Generating The Diff File
 
 ```shell
-    git diff v3.5.1 > ../datafusion-comet/dev/diffs/3.5.1.diff
+git diff v3.5.1 > ../datafusion-comet/dev/diffs/3.5.1.diff
 ```
 
 ## Running Tests in CI