Skip to content

Commit

Permalink
add links to tracking issue
Browse files Browse the repository at this point in the history
  • Loading branch information
andygrove committed Jul 1, 2024
1 parent 345592a commit 6e93f4b
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 17 deletions.
43 changes: 27 additions & 16 deletions dev/diffs/3.5.1.diff
Original file line number Diff line number Diff line change
Expand Up @@ -1533,7 +1533,7 @@ index 68bae34790a..ea906fd1adc 100644
assert(shuffles2.size == 4)
val smj2 = findTopLevelSortMergeJoin(adaptive2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
index 15055a276fa..0f3748b965e 100644
index 15055a276fa..6e60b94dc3d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
@@ -23,7 +23,7 @@ import java.text.SimpleDateFormat
Expand All @@ -1545,82 +1545,90 @@ index 15055a276fa..0f3748b965e 100644
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.trees.TreeNodeTag
import org.apache.spark.sql.execution.FileSourceScanExec
@@ -116,7 +116,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
@@ -116,7 +116,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
testName: String, fileSchema: StructType)
(f: (DataFrame, Map[String, Any], Map[String, Any]) => Unit): Unit = {
Seq("json", "parquet").foreach { testFileFormat =>
- test(s"metadata struct ($testFileFormat): " + testName) {
+ test(s"metadata struct ($testFileFormat): " + testName,
+ // https://github.com/apache/datafusion-comet/issues/617
+ IgnoreComet("TODO: fix Comet for this test")) {
withTempDir { dir =>
import scala.collection.JavaConverters._

@@ -767,7 +768,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
@@ -767,7 +769,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {

Seq(true, false).foreach { useVectorizedReader =>
val label = if (useVectorizedReader) "reading batches" else "reading rows"
- test(s"SPARK-39806: metadata for a partitioned table ($label)") {
+ test(s"SPARK-39806: metadata for a partitioned table ($label)",
+ // https://github.com/apache/datafusion-comet/issues/617
+ IgnoreComet("TODO: fix Comet for this test")) {
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> useVectorizedReader.toString) {
withTempPath { dir =>
// Store dynamically partitioned data.
@@ -789,7 +791,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
@@ -789,7 +793,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
}

Seq("parquet", "orc").foreach { format =>
- test(s"SPARK-40918: Output cols around WSCG.isTooManyFields limit in $format") {
+ test(s"SPARK-40918: Output cols around WSCG.isTooManyFields limit in $format",
+ // https://github.com/apache/datafusion-comet/issues/617
+ IgnoreComet("TODO: fix Comet for this test")) {
// The issue was that ParquetFileFormat would not count the _metadata columns towards
// the WholeStageCodegenExec.isTooManyFields limit, while FileSourceScanExec would,
// resulting in Parquet reader returning columnar output, while scan expected row.
@@ -862,7 +865,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
@@ -862,7 +868,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
}
}

- test("SPARK-41896: Filter on constant and generated metadata attributes at the same time") {
+ test("SPARK-41896: Filter on constant and generated metadata attributes at the same time",
+ // https://github.com/apache/datafusion-comet/issues/617
+ IgnoreComet("TODO: fix Comet for this test")) {
withTempPath { dir =>
val idColumnName = "id"
val partitionColumnName = "partition"
@@ -897,7 +901,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
@@ -897,7 +905,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
}
}

- test("SPARK-41896: Filter by a function that takes the metadata struct as argument") {
+ test("SPARK-41896: Filter by a function that takes the metadata struct as argument",
+ // https://github.com/apache/datafusion-comet/issues/617
+ IgnoreComet("TODO: fix Comet for this test")) {
withTempPath { dir =>
val idColumnName = "id"
val numFiles = 4
@@ -984,7 +989,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
@@ -984,7 +994,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {


Seq("parquet", "json", "csv", "text", "orc").foreach { format =>
- test(s"metadata file path is url encoded for format: $format") {
+ test(s"metadata file path is url encoded for format: $format",
+ // https://github.com/apache/datafusion-comet/issues/617
+ IgnoreComet("TODO: fix Comet for this test")) {
withTempPath { f =>
val dirWithSpace = s"$f/with space"
spark.range(10)
@@ -1002,7 +1008,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
@@ -1002,7 +1014,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
}
}

- test(s"metadata file name is url encoded for format: $format") {
+ test(s"metadata file name is url encoded for format: $format",
+ // https://github.com/apache/datafusion-comet/issues/617
+ IgnoreComet("TODO: fix Comet for this test")) {
val suffix = if (format == "text") ".txt" else s".$format"
withTempPath { f =>
val dirWithSpace = s"$f/with space"
@@ -1056,7 +1063,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
@@ -1056,7 +1070,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
}
}

- test("SPARK-43450: Filter on full _metadata column struct") {
+ test("SPARK-43450: Filter on full _metadata column struct",
+ // https://github.com/apache/datafusion-comet/issues/617
+ IgnoreComet("TODO: fix Comet for this test")) {
withTempPath { dir =>
val numRows = 10
Expand Down Expand Up @@ -1749,7 +1757,7 @@ index 07e2849ce6f..3e73645b638 100644
ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString
)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala
index c10e1799702..2f78f6c44e4 100644
index c10e1799702..f18ca092dba 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala
@@ -16,7 +16,7 @@
Expand All @@ -1761,11 +1769,12 @@ index c10e1799702..2f78f6c44e4 100644
import org.apache.spark.sql.execution.datasources.FileFormat
import org.apache.spark.sql.functions.{col, lit}
import org.apache.spark.sql.internal.SQLConf
@@ -219,7 +219,8 @@ class ParquetFileMetadataStructRowIndexSuite extends QueryTest with SharedSparkS
@@ -219,7 +219,9 @@ class ParquetFileMetadataStructRowIndexSuite extends QueryTest with SharedSparkS
}
}

- test(s"read user created ${FileFormat.METADATA_NAME}.${ROW_INDEX} column") {
+ // https://github.com/apache/datafusion-comet/issues/617
+ test(s"read user created ${FileFormat.METADATA_NAME}.${ROW_INDEX} column",
+ IgnoreComet("TODO: fix Comet for this test")) {
withReadDataFrame("parquet", partitionCol = "pb") { df =>
Expand Down Expand Up @@ -2119,7 +2128,7 @@ index d083cac48ff..3c11bcde807 100644
import testImplicits._

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index 746f289c393..8da2335f7a3 100644
index 746f289c393..bc01ffd52ea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -25,10 +25,11 @@ import org.apache.spark.sql.catalyst.expressions
Expand Down Expand Up @@ -2236,17 +2245,18 @@ index 746f289c393..8da2335f7a3 100644
checkAnswer(aggDF, df1.groupBy("j").agg(max("k")))
}
}
@@ -1013,7 +1039,8 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
@@ -1013,7 +1039,9 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
}
}

- test("bucket coalescing is applied when join expressions match with partitioning expressions") {
+ // https://github.com/apache/datafusion-comet/issues/617
+ test("bucket coalescing is applied when join expressions match with partitioning expressions",
+ IgnoreComet("TODO: fix Comet for this test")) {
withTable("t1", "t2", "t3") {
df1.write.format("parquet").bucketBy(8, "i", "j").saveAsTable("t1")
df2.write.format("parquet").bucketBy(4, "i", "j").saveAsTable("t2")
@@ -1029,15 +1056,23 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
@@ -1029,15 +1057,23 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
Seq(true, false).foreach { aqeEnabled =>
withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> aqeEnabled.toString) {
val plan = sql(query).queryExecution.executedPlan
Expand Down Expand Up @@ -2495,7 +2505,7 @@ index b4c4ec7acbf..20579284856 100644

val aggregateExecsWithoutPartialAgg = allAggregateExecs.filter {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index 3e1bc57dfa2..0d151977165 100644
index 3e1bc57dfa2..662640af934 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -28,10 +28,7 @@ import org.apache.commons.io.FileUtils
Expand All @@ -2510,7 +2520,7 @@ index 3e1bc57dfa2..0d151977165 100644
import org.apache.spark.sql.execution.streaming.{MemoryStream, StatefulOperatorStateInfo, StreamingSymmetricHashJoinExec, StreamingSymmetricHashJoinHelper}
import org.apache.spark.sql.execution.streaming.state.{RocksDBStateStoreProvider, StateStore, StateStoreProviderId}
import org.apache.spark.sql.functions._
@@ -594,40 +591,9 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
@@ -594,40 +591,10 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
CheckNewAnswer((5, 10, 5, 15, 5, 25)))
}

Expand Down Expand Up @@ -2548,6 +2558,7 @@ index 3e1bc57dfa2..0d151977165 100644
- && opA.numPartitions == numPartitions && opB.numPartitions == numPartitions => j
- }.size == 1)
- })
+ // https://github.com/apache/datafusion-comet/issues/617
+ test("streaming join should require StatefulOpClusteredDistribution from children",
+ IgnoreComet("TODO: fix Comet for this test")) {
+ fail("TODO fix diff")
Expand Down
2 changes: 1 addition & 1 deletion docs/source/contributor-guide/spark-sql-tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ wiggle --replace ./sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.sc
## Generating The Diff File

```shell
git diff v3.5.1 > ../datafusion-comet/dev/diffs/3.5.1.diff
git diff v3.5.1 > ../datafusion-comet/dev/diffs/3.5.1.diff
```

## Running Tests in CI
Expand Down

0 comments on commit 6e93f4b

Please sign in to comment.