Merge remote-tracking branch 'upstream/main' into spark-4.0-spark-test

kazuyukitanimura · Jun 5, 2024 · bdcd186 · bdcd186
2 parents b0d6d04 + c819bc0
commit bdcd186
Show file tree

Hide file tree

Showing 9 changed files with 1,197 additions and 477 deletions.
diff --git a/Makefile b/Makefile
@@ -77,6 +77,9 @@ release-linux: clean
 release:
 	cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo build --release
 	./mvnw install -Prelease -DskipTests $(PROFILES)
+release-nogit:
+	cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo build --features nightly --release
+	./mvnw install -Prelease -DskipTests $(PROFILES) -Dmaven.gitcommitid.skip=true
 benchmark-%: clean release
 	cd spark && COMET_CONF_DIR=$(shell pwd)/conf MAVEN_OPTS='-Xmx20g' ../mvnw exec:java -Dexec.mainClass="$*" -Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false" -Dexec.args="$(filter-out $@,$(MAKECMDGOALS))" $(PROFILES)
 .DEFAULT:

diff --git a/docs/source/user-guide/installation.md b/docs/source/user-guide/installation.md
@@ -57,6 +57,12 @@ Note that the project builds for Scala 2.12 by default but can be built for Scal
 make release PROFILES="-Pspark-3.4 -Pscala-2.13"
 ```
 
+To build Comet from the source distribution on an isolated environment without an access to `github.com` it is necessary to disable `git-commit-id-maven-plugin`, otherwise you will face errors that there is no access to the git during the build process. In that case you may use:
+
+```console
+make release-nogit PROFILES="-Pspark-3.4"
+```
+
 ## Run Spark Shell with Comet enabled
 
 Make sure `SPARK_HOME` points to the same Spark version as Comet was built for.

diff --git a/docs/source/user-guide/overview.md b/docs/source/user-guide/overview.md
@@ -29,7 +29,7 @@ Comet aims to support:
 - a native Parquet implementation, including both reader and writer
 - full implementation of Spark operators, including
   Filter/Project/Aggregation/Join/Exchange etc.
-- full implementation of Spark built-in expressions
+- full implementation of Spark built-in expressions.
 - a UDF framework for users to migrate their existing UDF to native
 
 ## Architecture

diff --git a/docs/source/user-guide/tuning.md b/docs/source/user-guide/tuning.md
@@ -39,6 +39,8 @@ It must be set before the Spark context is created. You can enable or disable Co
 at runtime by setting `spark.comet.exec.shuffle.enabled` to `true` or `false`.
 Once it is disabled, Comet will fallback to the default Spark shuffle manager.
 
+> **_NOTE:_** At the moment Comet Shuffle is not compatible with Spark AQE partition coalesce. To disable set `spark.sql.adaptive.coalescePartitions.enabled` to `false`.
+
 ### Shuffle Mode
 
 Comet provides three shuffle modes: Columnar Shuffle, Native Shuffle and Auto Mode.

diff --git a/docs/spark_builtin_expr_coverage.txt b/docs/spark_builtin_expr_coverage.txt