From 553e1b85c42a60c082d33f7b9df53b0495893286 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 7 May 2024 17:23:35 -0700 Subject: [PATCH] [SPARK-48152][BUILD] Make `spark-profiler` as a part of release and publish to maven central repo ### What changes were proposed in this pull request? The pr aims to - make the module `spark-profiler` as a part of Spark release - publish the module `spark-profiler` to `maven central repository` - add instructions on how to compile supports `spark-profiler` in the doc `docs/building-spark.md` ### Why are the changes needed? 1.The modules released in the current daily `spark-4.0.0` do not include `spark-profiler`. I believe that according to the current logic, the `spark-profiler` will not appear in the future official version of spark. 2.Align the compilation description of other modules in doc `docs/building-spark.md`, eg: image ### Does this PR introduce _any_ user-facing change? Yes, make it easy for users to use `spark-profiler` in the future version of Spark, instead of manually compiling `spark-profiler` based on source code. ### How was this patch tested? - Pass GA. - It is necessary to observe whether the daily snapshots `spark-profilter_2.13` generate https://repository.apache.org/content/repositories/snapshots/org/apache/spark/spark-profiler_2.13/4.0.0-SNAPSHOT/ ### Was this patch authored or co-authored using generative AI tooling? No. Closes #46402 from panbingkun/jvm_profiler. Authored-by: panbingkun Signed-off-by: Dongjoon Hyun --- .github/workflows/maven_test.yml | 10 +++++----- connector/profiler/README.md | 2 +- connector/profiler/pom.xml | 6 +++++- dev/create-release/release-build.sh | 2 +- dev/test-dependencies.sh | 2 +- docs/building-spark.md | 7 +++++++ pom.xml | 3 +++ 7 files changed, 23 insertions(+), 9 deletions(-) diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml index 38c6221247f90..d23cea926a274 100644 --- a/.github/workflows/maven_test.yml +++ b/.github/workflows/maven_test.yml @@ -190,18 +190,18 @@ jobs: export ENABLE_KINESIS_TESTS=0 # Replace with the real module name, for example, connector#kafka-0-10 -> connector/kafka-0-10 export TEST_MODULES=`echo "$MODULES_TO_TEST" | sed -e "s%#%/%g"` - ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install + ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install if [[ "$INCLUDED_TAGS" != "" ]]; then - ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae + ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae elif [[ "$MODULES_TO_TEST" == "connect" ]]; then ./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connector/connect/common,connector/connect/server test -fae elif [[ "$EXCLUDED_TAGS" != "" ]]; then - ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae + ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then # To avoid a compilation loop, for the `sql/hive-thriftserver` module, run `clean install` instead - ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install -fae + ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install -fae else - ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae + ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pjvm-profiler -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae fi - name: Clean up local Maven repository run: | diff --git a/connector/profiler/README.md b/connector/profiler/README.md index 527f8b487d4d4..d928a47cab7d2 100644 --- a/connector/profiler/README.md +++ b/connector/profiler/README.md @@ -23,7 +23,7 @@ Code profiling is currently only supported for To get maximum profiling information set the following jvm options for the executor : ``` - -XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints -XX:+PreserveFramePointer +spark.executor.extraJavaOptions=-XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints -XX:+PreserveFramePointer ``` For more information on async_profiler see the [Async Profiler Manual](https://krzysztofslusarski.github.io/2022/12/12/async-manual.html) diff --git a/connector/profiler/pom.xml b/connector/profiler/pom.xml index 14e5a73e31f14..6b254dbae128c 100644 --- a/connector/profiler/pom.xml +++ b/connector/profiler/pom.xml @@ -31,6 +31,9 @@ jar Spark Profiler + + Enables code profiling of executors based on the the async profiler. + https://spark.apache.org/ @@ -44,7 +47,8 @@ me.bechberger ap-loader-all - 3.0-9 + ${ap-loader.version} + provided diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 62d172ef74ca4..75ec98464f3ec 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -201,7 +201,7 @@ SCALA_2_12_PROFILES="-Pscala-2.12" HIVE_PROFILES="-Phive -Phive-thriftserver" # Profiles for publishing snapshots and release to Maven Central # We use Apache Hive 2.3 for publishing -PUBLISH_PROFILES="$BASE_PROFILES $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud" +PUBLISH_PROFILES="$BASE_PROFILES $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud -Pjvm-profiler" # Profiles for building binary releases BASE_RELEASE_PROFILES="$BASE_PROFILES -Psparkr" diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh index 175f59a700941..048c59f4cec9b 100755 --- a/dev/test-dependencies.sh +++ b/dev/test-dependencies.sh @@ -31,7 +31,7 @@ export LC_ALL=C # NOTE: These should match those in the release publishing script, and be kept in sync with # dev/create-release/release-build.sh HADOOP_MODULE_PROFILES="-Phive-thriftserver -Pkubernetes -Pyarn -Phive \ - -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud" + -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud -Pjvm-profiler" MVN="build/mvn" HADOOP_HIVE_PROFILES=( hadoop-3-hive-2.3 diff --git a/docs/building-spark.md b/docs/building-spark.md index d10dfc9434fec..73fc31610d95d 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -117,6 +117,13 @@ where `spark-streaming_{{site.SCALA_BINARY_VERSION}}` is the `artifactId` as def ./build/mvn -Pconnect -DskipTests clean package +## Building with JVM Profile support + + ./build/mvn -Pjvm-profiler -DskipTests clean package + +**Note:** The `jvm-profiler` profile builds the assembly without including the dependency `ap-loader`, +you can download it manually from maven central repo and use it together with `spark-profiler_{{site.SCALA_BINARY_VERSION}}`. + ## Continuous Compilation We use the scala-maven-plugin which supports incremental and continuous compilation. E.g. diff --git a/pom.xml b/pom.xml index 05c6f9841e61e..f6f11d94cce32 100644 --- a/pom.xml +++ b/pom.xml @@ -297,6 +297,9 @@ 1.1.3 6.0.53 + + 3.0-9 + 128m yyyy-MM-dd HH:mm:ss z