-
Notifications
You must be signed in to change notification settings - Fork 166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
build: Enable comet tests with spark-4.0 profile #493
Changes from all commits
a1fff9b
465b828
62b7d2f
8db78cb
02a970a
7251eb2
17a6995
57d6538
d3efeb8
e310eb1
69ca228
3aec9e6
d629df1
65628fb
328705f
b85c712
8dc9dba
9a4b605
8ec2767
396d077
b819e99
728bf8d
8374f74
a4d050b
0cd4870
f91c7be
c146661
8c4bf05
225858f
f472ee3
1cb96dc
7911e52
e258273
c1fd154
80a292b
5999156
3335e4d
f0f8b0c
f5aa2b4
be4c367
1910927
443bee8
8036660
d388d4b
46bddfa
1a8939f
53fd09c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,8 @@ import org.apache.spark.network.util.JavaUtils | |
import org.apache.spark.sql.comet.util.Utils | ||
import org.apache.spark.sql.internal.SQLConf | ||
|
||
import org.apache.comet.shims.ShimCometConf | ||
|
||
/** | ||
* Configurations for a Comet application. Mostly inspired by [[SQLConf]] in Spark. | ||
* | ||
|
@@ -41,7 +43,7 @@ import org.apache.spark.sql.internal.SQLConf | |
* which retrieves the config value from the thread-local [[SQLConf]] object. Alternatively, you | ||
* can also explicitly pass a [[SQLConf]] object to the `get` method. | ||
*/ | ||
object CometConf { | ||
object CometConf extends ShimCometConf { | ||
|
||
/** List of all configs that is used for generating documentation */ | ||
val allConfs = new ListBuffer[ConfigEntry[_]] | ||
|
@@ -361,7 +363,7 @@ object CometConf { | |
"column to a long column, a float column to a double column, etc. This is automatically" + | ||
"enabled when reading from Iceberg tables.") | ||
.booleanConf | ||
.createWithDefault(false) | ||
.createWithDefault(COMET_SCHEMA_EVOLUTION_ENABLED_DEFAULT) | ||
|
||
val COMET_ROW_TO_COLUMNAR_ENABLED: ConfigEntry[Boolean] = | ||
conf("spark.comet.rowToColumnar.enabled") | ||
|
@@ -382,12 +384,13 @@ object CometConf { | |
.createWithDefault(Seq("Range,InMemoryTableScan")) | ||
|
||
val COMET_ANSI_MODE_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.ansi.enabled") | ||
.internal() | ||
.doc( | ||
"Comet does not respect ANSI mode in most cases and by default will not accelerate " + | ||
"queries when ansi mode is enabled. Enable this setting to test Comet's experimental " + | ||
"support for ANSI mode. This should not be used in production.") | ||
.booleanConf | ||
.createWithDefault(false) | ||
.createWithDefault(COMET_ANSI_MODE_ENABLED_DEFAULT) | ||
Comment on lines
+387
to
+393
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We may need to mention it is enabled by default for Spark 4.0 now. It could produce incorrect results so we better don't make it implicitly enabled. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, Spark 4.0 support is not ready at all. I will send follow up PRs |
||
|
||
val COMET_CAST_ALLOW_INCOMPATIBLE: ConfigEntry[Boolean] = | ||
conf("spark.comet.cast.allowIncompatible") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.comet.shims | ||
|
||
trait ShimCometConf { | ||
protected val COMET_SCHEMA_EVOLUTION_ENABLED_DEFAULT = false | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: do we need a trailing _DEFAULT in the name? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would say so as we have |
||
protected val COMET_ANSI_MODE_ENABLED_DEFAULT = false | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.comet.shims | ||
|
||
trait ShimCometConf { | ||
protected val COMET_SCHEMA_EVOLUTION_ENABLED_DEFAULT = true | ||
protected val COMET_ANSI_MODE_ENABLED_DEFAULT = true | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we really want to enable this by default when we know that we are not compatible yet? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suppose we have to do this otherwise all plans will currently fall back to Spark because ANSI mode is enabled by default in Spark 4. We should really only fall back to Spark for expressions that are actually affected by ANSI support rather than fall back for everything. Perhaps we could add a note to the documentation explaining that ANSI mode in Spark 4 is not fully compatible yet. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am thinking to use Spark-4.0 as a test bed for ANSI readiness. I will send another PR for enabling Spark tests and if I find any incompatibility, I will disable them per function. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You need it to be enabled for Spark tests, isn't? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,7 +23,6 @@ Comet provides the following configuration settings. | |
|
||
| Config | Description | Default Value | | ||
|--------|-------------|---------------| | ||
| spark.comet.ansi.enabled | Comet does not respect ANSI mode in most cases and by default will not accelerate queries when ansi mode is enabled. Enable this setting to test Comet's experimental support for ANSI mode. This should not be used in production. | false | | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This highlights that our configs vary depending on the version we build against and we currently only publish one version of our configs. Ideally we should generate configs per spark version and publish all of them. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will file an issue for this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, I plan to associate this config with |
||
| spark.comet.batchSize | The columnar batch size, i.e., the maximum number of rows that a batch can contain. | 8192 | | ||
| spark.comet.cast.allowIncompatible | Comet is not currently fully compatible with Spark for all cast operations. Set this config to true to allow them anyway. See compatibility guide for more information. | false | | ||
| spark.comet.columnar.shuffle.async.enabled | Whether to enable asynchronous shuffle for Arrow-based shuffle. By default, this config is false. | false | | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2201,7 +2201,8 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim | |
} | ||
|
||
def nullIfWhenPrimitive(expression: Expression): Expression = if (isPrimitive(expression)) { | ||
new NullIf(expression, Literal.default(expression.dataType)).child | ||
val zero = Literal.default(expression.dataType) | ||
If(EqualTo(expression, zero), Literal.create(null, expression.dataType), expression) | ||
Comment on lines
+2204
to
+2205
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} else { | ||
expression | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why making it internal?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As discussed in #493 (comment)
doc is not per profile yet. By making it internal, doc will ignore it.