Skip to content

Commit

Permalink
build: Enable comet tests with spark-4.0 profile (apache#493)
Browse files Browse the repository at this point in the history
## Rationale for this change

To be ready for Spark 4.0

## What changes are included in this PR?

This PR enables the comet tests with the spark-4.0 profile

## How are these changes tested?

Tests with the spark-4.0 profile now should pass. (But Spark tests do not yet)
  • Loading branch information
kazuyukitanimura committed Jul 1, 2024
1 parent 9108f2a commit 48ddcdd
Show file tree
Hide file tree
Showing 286 changed files with 59,332 additions and 36 deletions.
18 changes: 6 additions & 12 deletions .github/workflows/pr_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,8 @@ jobs:
- name: Java test steps
uses: ./.github/actions/java-test
with:
# TODO: remove -DskipTests after fixing tests
maven_opts: "-Pspark-${{ matrix.spark-version }} -DskipTests"
# TODO: upload test reports after enabling tests
upload-test-reports: false
maven_opts: -Pspark-${{ matrix.spark-version }}
upload-test-reports: true

linux-test-with-old-spark:
strategy:
Expand Down Expand Up @@ -237,10 +235,8 @@ jobs:
- name: Java test steps
uses: ./.github/actions/java-test
with:
# TODO: remove -DskipTests after fixing tests
maven_opts: "-Pspark-${{ matrix.spark-version }} -DskipTests"
# TODO: upload test reports after enabling tests
upload-test-reports: false
maven_opts: -Pspark-${{ matrix.spark-version }}
upload-test-reports: true

macos-aarch64-test-with-spark4_0:
strategy:
Expand Down Expand Up @@ -277,10 +273,8 @@ jobs:
- name: Java test steps
uses: ./.github/actions/java-test
with:
# TODO: remove -DskipTests after fixing tests
maven_opts: "-Pspark-${{ matrix.spark-version }} -DskipTests"
# TODO: upload test reports after enabling tests
upload-test-reports: false
maven_opts: -Pspark-${{ matrix.spark-version }}
upload-test-reports: true

macos-aarch64-test-with-old-spark:
strategy:
Expand Down
9 changes: 6 additions & 3 deletions common/src/main/scala/org/apache/comet/CometConf.scala
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ import org.apache.spark.network.util.JavaUtils
import org.apache.spark.sql.comet.util.Utils
import org.apache.spark.sql.internal.SQLConf

import org.apache.comet.shims.ShimCometConf

/**
* Configurations for a Comet application. Mostly inspired by [[SQLConf]] in Spark.
*
Expand All @@ -41,7 +43,7 @@ import org.apache.spark.sql.internal.SQLConf
* which retrieves the config value from the thread-local [[SQLConf]] object. Alternatively, you
* can also explicitly pass a [[SQLConf]] object to the `get` method.
*/
object CometConf {
object CometConf extends ShimCometConf {

/** List of all configs that is used for generating documentation */
val allConfs = new ListBuffer[ConfigEntry[_]]
Expand Down Expand Up @@ -361,7 +363,7 @@ object CometConf {
"column to a long column, a float column to a double column, etc. This is automatically" +
"enabled when reading from Iceberg tables.")
.booleanConf
.createWithDefault(false)
.createWithDefault(COMET_SCHEMA_EVOLUTION_ENABLED_DEFAULT)

val COMET_ROW_TO_COLUMNAR_ENABLED: ConfigEntry[Boolean] =
conf("spark.comet.rowToColumnar.enabled")
Expand All @@ -382,12 +384,13 @@ object CometConf {
.createWithDefault(Seq("Range,InMemoryTableScan"))

val COMET_ANSI_MODE_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.ansi.enabled")
.internal()
.doc(
"Comet does not respect ANSI mode in most cases and by default will not accelerate " +
"queries when ansi mode is enabled. Enable this setting to test Comet's experimental " +
"support for ANSI mode. This should not be used in production.")
.booleanConf
.createWithDefault(false)
.createWithDefault(COMET_ANSI_MODE_ENABLED_DEFAULT)

val COMET_CAST_ALLOW_INCOMPATIBLE: ConfigEntry[Boolean] =
conf("spark.comet.cast.allowIncompatible")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.comet.shims

trait ShimCometConf {
protected val COMET_SCHEMA_EVOLUTION_ENABLED_DEFAULT = false
protected val COMET_ANSI_MODE_ENABLED_DEFAULT = false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.comet.shims

trait ShimCometConf {
protected val COMET_SCHEMA_EVOLUTION_ENABLED_DEFAULT = true
protected val COMET_ANSI_MODE_ENABLED_DEFAULT = true
}
4 changes: 4 additions & 0 deletions docs/source/contributor-guide/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,23 +92,27 @@ The plan stability testing framework is located in the `spark` module and can be

```sh
./mvnw -pl spark -Dsuites="org.apache.spark.sql.comet.CometTPCDSV1_4_PlanStabilitySuite" test
./mvnw -pl spark -Dsuites="org.apache.spark.sql.comet.CometTPCDSV1_4_PlanStabilitySuite" -Pspark-4.0 -nsu test
```

and
```sh
./mvnw -pl spark -Dsuites="org.apache.spark.sql.comet.CometTPCDSV2_7_PlanStabilitySuite" test
./mvnw -pl spark -Dsuites="org.apache.spark.sql.comet.CometTPCDSV2_7_PlanStabilitySuite" -Pspark-4.0 -nsu test
```

If your pull request changes the query plans generated by Comet, you should regenerate the golden files.
To regenerate the golden files, you can run the following command:

```sh
SPARK_GENERATE_GOLDEN_FILES=1 ./mvnw -pl spark -Dsuites="org.apache.spark.sql.comet.CometTPCDSV1_4_PlanStabilitySuite" test
SPARK_GENERATE_GOLDEN_FILES=1 ./mvnw -pl spark -Dsuites="org.apache.spark.sql.comet.CometTPCDSV1_4_PlanStabilitySuite" -Pspark-4.0 -nsu test
```

and
```sh
SPARK_GENERATE_GOLDEN_FILES=1 ./mvnw -pl spark -Dsuites="org.apache.spark.sql.comet.CometTPCDSV2_7_PlanStabilitySuite" test
SPARK_GENERATE_GOLDEN_FILES=1 ./mvnw -pl spark -Dsuites="org.apache.spark.sql.comet.CometTPCDSV2_7_PlanStabilitySuite" -Pspark-4.0 -nsu test
```

## Benchmark
Expand Down
1 change: 0 additions & 1 deletion docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ Comet provides the following configuration settings.

| Config | Description | Default Value |
|--------|-------------|---------------|
| spark.comet.ansi.enabled | Comet does not respect ANSI mode in most cases and by default will not accelerate queries when ansi mode is enabled. Enable this setting to test Comet's experimental support for ANSI mode. This should not be used in production. | false |
| spark.comet.batchSize | The columnar batch size, i.e., the maximum number of rows that a batch can contain. | 8192 |
| spark.comet.cast.allowIncompatible | Comet is not currently fully compatible with Spark for all cast operations. Set this config to true to allow them anyway. See compatibility guide for more information. | false |
| spark.comet.columnar.shuffle.async.enabled | Whether to enable asynchronous shuffle for Arrow-based shuffle. By default, this config is false. | false |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1030,6 +1030,10 @@ object CometSparkSessionExtensions extends Logging {
org.apache.spark.SPARK_VERSION >= "3.4"
}

def isSpark40Plus: Boolean = {
org.apache.spark.SPARK_VERSION >= "4.0"
}

/** Calculates required memory overhead in MB per executor process for Comet. */
def getCometMemoryOverheadInMiB(sparkConf: SparkConf): Long = {
// `spark.executor.memory` default value is 1g
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2244,7 +2244,8 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim
}

def nullIfWhenPrimitive(expression: Expression): Expression = if (isPrimitive(expression)) {
new NullIf(expression, Literal.default(expression.dataType)).child
val zero = Literal.default(expression.dataType)
If(EqualTo(expression, zero), Literal.create(null, expression.dataType), expression)
} else {
expression
}
Expand Down
Loading

0 comments on commit 48ddcdd

Please sign in to comment.