diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml
index 38c6221247f90..d23cea926a274 100644
--- a/.github/workflows/maven_test.yml
+++ b/.github/workflows/maven_test.yml
@@ -190,18 +190,18 @@ jobs:
export ENABLE_KINESIS_TESTS=0
# Replace with the real module name, for example, connector#kafka-0-10 -> connector/kafka-0-10
export TEST_MODULES=`echo "$MODULES_TO_TEST" | sed -e "s%#%/%g"`
- ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install
+ ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install
if [[ "$INCLUDED_TAGS" != "" ]]; then
- ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
+ ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
elif [[ "$MODULES_TO_TEST" == "connect" ]]; then
./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connector/connect/common,connector/connect/server test -fae
elif [[ "$EXCLUDED_TAGS" != "" ]]; then
- ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
+ ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then
# To avoid a compilation loop, for the `sql/hive-thriftserver` module, run `clean install` instead
- ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install -fae
+ ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install -fae
else
- ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae
+ ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pjvm-profiler -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae
fi
- name: Clean up local Maven repository
run: |
diff --git a/connector/profiler/README.md b/connector/profiler/README.md
index 527f8b487d4d4..d928a47cab7d2 100644
--- a/connector/profiler/README.md
+++ b/connector/profiler/README.md
@@ -23,7 +23,7 @@ Code profiling is currently only supported for
To get maximum profiling information set the following jvm options for the executor :
```
- -XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints -XX:+PreserveFramePointer
+spark.executor.extraJavaOptions=-XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints -XX:+PreserveFramePointer
```
For more information on async_profiler see the [Async Profiler Manual](https://krzysztofslusarski.github.io/2022/12/12/async-manual.html)
diff --git a/connector/profiler/pom.xml b/connector/profiler/pom.xml
index 14e5a73e31f14..6b254dbae128c 100644
--- a/connector/profiler/pom.xml
+++ b/connector/profiler/pom.xml
@@ -31,6 +31,9 @@
jar
Spark Profiler
+
+ Enables code profiling of executors based on the the async profiler.
+
https://spark.apache.org/
@@ -44,7 +47,8 @@
me.bechberger
ap-loader-all
- 3.0-9
+ ${ap-loader.version}
+ provided
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 62d172ef74ca4..75ec98464f3ec 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -201,7 +201,7 @@ SCALA_2_12_PROFILES="-Pscala-2.12"
HIVE_PROFILES="-Phive -Phive-thriftserver"
# Profiles for publishing snapshots and release to Maven Central
# We use Apache Hive 2.3 for publishing
-PUBLISH_PROFILES="$BASE_PROFILES $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud"
+PUBLISH_PROFILES="$BASE_PROFILES $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud -Pjvm-profiler"
# Profiles for building binary releases
BASE_RELEASE_PROFILES="$BASE_PROFILES -Psparkr"
diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
index 175f59a700941..048c59f4cec9b 100755
--- a/dev/test-dependencies.sh
+++ b/dev/test-dependencies.sh
@@ -31,7 +31,7 @@ export LC_ALL=C
# NOTE: These should match those in the release publishing script, and be kept in sync with
# dev/create-release/release-build.sh
HADOOP_MODULE_PROFILES="-Phive-thriftserver -Pkubernetes -Pyarn -Phive \
- -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud"
+ -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud -Pjvm-profiler"
MVN="build/mvn"
HADOOP_HIVE_PROFILES=(
hadoop-3-hive-2.3
diff --git a/docs/building-spark.md b/docs/building-spark.md
index d10dfc9434fec..73fc31610d95d 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -117,6 +117,13 @@ where `spark-streaming_{{site.SCALA_BINARY_VERSION}}` is the `artifactId` as def
./build/mvn -Pconnect -DskipTests clean package
+## Building with JVM Profile support
+
+ ./build/mvn -Pjvm-profiler -DskipTests clean package
+
+**Note:** The `jvm-profiler` profile builds the assembly without including the dependency `ap-loader`,
+you can download it manually from maven central repo and use it together with `spark-profiler_{{site.SCALA_BINARY_VERSION}}`.
+
## Continuous Compilation
We use the scala-maven-plugin which supports incremental and continuous compilation. E.g.
diff --git a/pom.xml b/pom.xml
index 05c6f9841e61e..f6f11d94cce32 100644
--- a/pom.xml
+++ b/pom.xml
@@ -297,6 +297,9 @@
1.1.3
6.0.53
+
+ 3.0-9
+
128m
yyyy-MM-dd HH:mm:ss z