apache · viirya · May 3, 2024 · Apr 24, 2024 · Apr 24, 2024 · Apr 25, 2024
diff --git a/.github/workflows/spark_sql_test_ansi.yml b/.github/workflows/spark_sql_test_ansi.yml
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Spark SQL Tests (ANSI mode)
+
+concurrency:
+  group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+on:
+  # enable the following once Ansi support is completed
+  #  push:
+  #    paths-ignore:
+  #      - "doc/**"
+  #      - "**.md"
+  #  pull_request:
+  #    paths-ignore:
+  #      - "doc/**"
+  #      - "**.md"
+
+  # manual trigger ONLY
+  # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
+  workflow_dispatch:
+
+env:
+  RUST_VERSION: nightly
+
+jobs:
+  spark-sql-catalyst:
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        java-version: [11]
+        spark-version: [{short: '3.4', full: '3.4.2'}]
+        module:
+          - {name: "catalyst", args1: "catalyst/test", args2: ""}
+          - {name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
+          - {name: "sql/core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest"}
+          - {name: "sql/core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest"}
+          - {name: "sql/hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
+          - {name: "sql/hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
+          - {name: "sql/hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
+      fail-fast: false
+    name: spark-sql-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.java-version }}
+    runs-on: ${{ matrix.os }}
+    container:
+      image: amd64/rust
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup Rust & Java toolchain
+        uses: ./.github/actions/setup-builder
+        with:
+          rust-version: ${{env.RUST_VERSION}}
+          jdk-version: ${{ matrix.java-version }}
+      - name: Setup Spark
+        uses: ./.github/actions/setup-spark-builder
+        with:
+          spark-version: ${{ matrix.spark-version.full }}
+          spark-short-version: ${{ matrix.spark-version.short }}
+          comet-version: '0.1.0-SNAPSHOT' # TODO: get this from pom.xml
+      - name: Run Spark tests
+        run: |
+          cd apache-spark
+          ENABLE_COMET=true ENABLE_COMET_ANSI_MODE=true build/sbt ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
+        env:
+          LC_ALL: "C.UTF-8"
+
diff --git a/dev/diffs/3.4.2.diff b/dev/diffs/3.4.2.diff
@@ -1327,7 +1327,7 @@ index abe606ad9c1..2d930b64cca 100644
      val tblTargetName = "tbl_target"
      val tblSourceQualified = s"default.$tblSourceName"
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
-index dd55fcfe42c..cc18147d17a 100644
+index dd55fcfe42c..b4776c50e49 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
 @@ -41,6 +41,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
@@ -1351,7 +1351,7 @@ index dd55fcfe42c..cc18147d17a 100644
      }
    }
 
-@@ -242,6 +247,23 @@ private[sql] trait SQLTestUtilsBase
+@@ -242,6 +247,32 @@ private[sql] trait SQLTestUtilsBase
      protected override def _sqlContext: SQLContext = self.spark.sqlContext
    }
 
@@ -1371,11 +1371,20 @@ index dd55fcfe42c..cc18147d17a 100644
 +    val v = System.getenv("ENABLE_COMET_SCAN_ONLY")
 +    v != null && v.toBoolean
 +  }
++
++  /**
++   * Whether to enable ansi mode This is only effective when
++   * [[isCometEnabled]] returns true.
++   */
++  protected def enableCometAnsiMode: Boolean = {
++    val v = System.getenv("ENABLE_COMET_ANSI_MODE")
++    v != null && v.toBoolean
-+    val v = System.getenv("ENABLE_COMET_ANSI_MODE")
-+    v != null && v.toBoolean
+    System.getenv("ENABLE_COMET_ANSI_MODE").exists(_.toBoolean)
-+    val v = System.getenv("ENABLE_COMET_ANSI_MODE")
-+    v != null && v.toBoolean
+    System.getenv("ENABLE_COMET_ANSI_MODE").exists(_.toBoolean)
++  }
 +
    protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
      SparkSession.setActiveSession(spark)
      super.withSQLConf(pairs: _*)(f)
-@@ -434,6 +456,8 @@ private[sql] trait SQLTestUtilsBase
+@@ -434,6 +465,8 @@ private[sql] trait SQLTestUtilsBase
      val schema = df.schema
      val withoutFilters = df.queryExecution.executedPlan.transform {
        case FilterExec(_, child) => child
@@ -1385,10 +1394,10 @@ index dd55fcfe42c..cc18147d17a 100644
 
      spark.internalCreateDataFrame(withoutFilters.execute(), schema)
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
-index ed2e309fa07..4cfe0093da7 100644
+index ed2e309fa07..f64cc283903 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
-@@ -74,6 +74,21 @@ trait SharedSparkSessionBase
+@@ -74,6 +74,28 @@ trait SharedSparkSessionBase
        // this rule may potentially block testing of other optimization rules such as
        // ConstantPropagation etc.
        .set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, ConvertToLocalRelation.ruleName)
@@ -1406,6 +1415,13 @@ index ed2e309fa07..4cfe0093da7 100644
 +            "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
 +          .set("spark.comet.exec.shuffle.enabled", "true")
 +      }
++
++      if (enableCometAnsiMode) {
++        conf
++          .set("spark.sql.ansi.enabled", "true")
++          .set("spark.comet.ansi.enabled", "true")
++      }
++
 +    }
      conf.set(
        StaticSQLConf.WAREHOUSE_PATH,
@@ -1447,10 +1463,10 @@ index 1966e1e64fd..cde97a0aafe 100644
        spark.sql(
          """
 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
-index 07361cfdce9..c5d94c92e32 100644
+index 07361cfdce9..1763168a808 100644
 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
 +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
-@@ -55,25 +55,46 @@ object TestHive
+@@ -55,25 +55,54 @@ object TestHive
      new SparkContext(
        System.getProperty("spark.sql.test.master", "local[1]"),
        "TestSQLContext",
@@ -1507,8 +1523,16 @@ index 07361cfdce9..c5d94c92e32 100644
 +                "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
 +              .set("spark.comet.exec.shuffle.enabled", "true")
 +          }
-+        }
++
++          val a = System.getenv("ENABLE_COMET_ANSI_MODE")
++          if (a != null && a.toBoolean) {
++            conf
++              .set("spark.sql.ansi.enabled", "true")
++              .set("spark.comet.ansi.enabled", "true")
++          }
 
++        }
++
 +        conf
 +      }
 +    ))

diff --git a/pom.xml b/pom.xml
@@ -885,7 +885,7 @@ under the License.
             <exclude>rust-toolchain</exclude>
             <exclude>Makefile</exclude>
             <exclude>dev/Dockerfile*</exclude>
-            <exclude>dev/diff/**</exclude>
+            <exclude>dev/diffs/**</exclude>
             <exclude>dev/deploy-file</exclude>
             <exclude>**/test/resources/**</exclude>
             <exclude>**/benchmarks/*.txt</exclude>