From a3dd8df22f016776f227721a31bfaa8bf730ce03 Mon Sep 17 00:00:00 2001 From: advancedxy Date: Sun, 3 Mar 2024 13:58:10 +0800 Subject: [PATCH] build: Support CI pipelines for Spark 3.2, 3.3 and 3.4 (#153) --- .github/actions/java-test/action.yaml | 11 ++- .github/workflows/pr_build.yml | 80 ++++++++++++++++--- .../comet/CometSparkSessionExtensions.scala | 4 + .../apache/comet/CometExpressionSuite.scala | 3 +- .../apache/comet/exec/CometExecSuite.scala | 3 + .../org/apache/spark/sql/CometTestBase.scala | 2 +- 6 files changed, 90 insertions(+), 13 deletions(-) diff --git a/.github/actions/java-test/action.yaml b/.github/actions/java-test/action.yaml index 6c3af7998..4b594e648 100644 --- a/.github/actions/java-test/action.yaml +++ b/.github/actions/java-test/action.yaml @@ -15,6 +15,13 @@ # specific language governing permissions and limitations # under the License. +name: "Java Test" +description: "Run Java tests" +inputs: + MAVEN_OPTS: + description: 'Maven options passed to the mvn command' + required: false + default: '' runs: using: "composite" steps: @@ -37,9 +44,9 @@ runs: - name: Run Maven compile shell: bash run: | - ./mvnw -B compile test-compile scalafix:scalafix -Psemanticdb + ./mvnw -B compile test-compile scalafix:scalafix -Psemanticdb ${{ inputs.MAVEN_OPTS }} - name: Run tests shell: bash run: | - SPARK_HOME=`pwd` ./mvnw -B clean install + SPARK_HOME=`pwd` ./mvnw -B clean install ${{ inputs.MAVEN_OPTS }} diff --git a/.github/workflows/pr_build.yml b/.github/workflows/pr_build.yml index d905095e4..b322c8924 100644 --- a/.github/workflows/pr_build.yml +++ b/.github/workflows/pr_build.yml @@ -44,13 +44,14 @@ jobs: os: [ubuntu-latest] java_version: [8, 11, 17] test-target: [rust, java] + spark-version: ['3.4'] is_push_event: - ${{ github.event_name == 'push' }} exclude: # exclude java 11 for pull_request event - java_version: 11 is_push_event: false fail-fast: false - name: ${{ matrix.test-target }} test on ${{ matrix.os }} with java ${{ matrix.java_version }} + name: ${{ matrix.os }}/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}/${{ matrix.test-target }} runs-on: ${{ matrix.os }} container: image: amd64/rust @@ -61,14 +62,43 @@ jobs: with: rust-version: ${{env.RUST_VERSION}} jdk-version: ${{ matrix.java_version }} - - - uses: actions/checkout@v4 - if: matrix.test-target == 'rust' name: Rust test steps uses: ./.github/actions/rust-test - if: matrix.test-target == 'java' name: Java test steps uses: ./.github/actions/java-test + with: + MAVEN_OPTS: -Pspark-${{ matrix.spark-version }} + + linux-test-with-old-spark: + strategy: + matrix: + os: [ubuntu-latest] + java_version: [8, 11, 17] + test-target: [java] + spark-version: ['3.2', '3.3'] + exclude: + - java_version: 17 + spark-version: '3.2' + - java_version: 11 + spark-version: '3.2' + fail-fast: false + name: ${{ matrix.os }}/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}/${{ matrix.test-target }} + runs-on: ${{ matrix.os }} + container: + image: amd64/rust + steps: + - uses: actions/checkout@v4 + - name: Setup Rust & Java toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: ${{env.RUST_VERSION}} + jdk-version: ${{ matrix.java_version }} + - name: Java test steps + uses: ./.github/actions/java-test + with: + MAVEN_OPTS: -Pspark-${{ matrix.spark-version }} macos-test: strategy: @@ -76,9 +106,10 @@ jobs: os: [macos-13] java_version: [8, 11, 17] test-target: [rust, java] + spark-version: ['3.4'] fail-fast: false if: github.event_name == 'push' - name: ${{ matrix.test-target }} test on ${{ matrix.os }} with java ${{ matrix.java_version }} + name: ${{ matrix.os }}/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}/${{ matrix.test-target }} runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 @@ -87,27 +118,28 @@ jobs: with: rust-version: ${{env.RUST_VERSION}} jdk-version: ${{ matrix.java_version }} - - - uses: actions/checkout@v4 - if: matrix.test-target == 'rust' name: Rust test steps uses: ./.github/actions/rust-test - if: matrix.test-target == 'java' name: Java test steps uses: ./.github/actions/java-test + with: + MAVEN_OPTS: -Pspark-${{ matrix.spark-version }} macos-aarch64-test: strategy: matrix: java_version: [8, 11, 17] test-target: [rust, java] + spark-version: ['3.4'] is_push_event: - ${{ github.event_name == 'push' }} exclude: # exclude java 11 for pull_request event - java_version: 11 is_push_event: false fail-fast: false - name: ${{ matrix.test-target }} test on macos-aarch64 with java ${{ matrix.java_version }} + name: macos-14(Silicon)/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}/${{ matrix.test-target }} runs-on: macos-14 steps: - uses: actions/checkout@v4 @@ -118,14 +150,44 @@ jobs: jdk-version: ${{ matrix.java_version }} jdk-architecture: aarch64 protoc-architecture: aarch_64 - - - uses: actions/checkout@v4 - if: matrix.test-target == 'rust' name: Rust test steps uses: ./.github/actions/rust-test - if: matrix.test-target == 'java' name: Java test steps uses: ./.github/actions/java-test + with: + MAVEN_OPTS: -Pspark-${{ matrix.spark-version }} + + macos-aarch64-test-with-old-spark: + strategy: + matrix: + java_version: [8, 17] + test-target: [java] + spark-version: ['3.2', '3.3'] + exclude: + - java_version: 17 + spark-version: '3.2' + - java_version: 8 + spark-version: '3.3' + fail-fast: false + name: macos-14(Silicon)/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}/${{ matrix.test-target }} + runs-on: macos-14 + steps: + - uses: actions/checkout@v4 + - name: Setup Rust & Java toolchain + uses: ./.github/actions/setup-macos-builder + with: + rust-version: ${{env.RUST_VERSION}} + jdk-version: ${{ matrix.java_version }} + jdk-architecture: aarch64 + protoc-architecture: aarch_64 + - if: matrix.test-target == 'java' + name: Java test steps + uses: ./.github/actions/java-test + with: + MAVEN_OPTS: -Pspark-${{ matrix.spark-version }} + check-pr-title: runs-on: ubuntu-latest steps: diff --git a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala index dae9f3fd5..8037f5559 100644 --- a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala +++ b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala @@ -602,6 +602,10 @@ object CometSparkSessionExtensions extends Logging { org.apache.spark.SPARK_VERSION.matches("3\\.2\\..*") } + def isSpark33Plus: Boolean = { + org.apache.spark.SPARK_VERSION >= "3.3" + } + /** Used for operations that are available in Spark 3.4+ */ def isSpark34Plus: Boolean = { org.apache.spark.SPARK_VERSION >= "3.4" diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 2609bd3ef..cbb77b9ff 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE import org.apache.spark.sql.types.{Decimal, DecimalType, StructType} -import org.apache.comet.CometSparkSessionExtensions.{isSpark32, isSpark34Plus} +import org.apache.comet.CometSparkSessionExtensions.{isSpark32, isSpark33Plus, isSpark34Plus} class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { import testImplicits._ @@ -393,6 +393,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } test("date_trunc with format array") { + assume(isSpark33Plus, "TimestampNTZ is supported in Spark 3.3+, See SPARK-36182") val numRows = 1000 Seq(true, false).foreach { dictionaryEnabled => withTempDir { dir => diff --git a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala index d7434d518..0ed719fab 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala @@ -72,6 +72,7 @@ class CometExecSuite extends CometTestBase { } test("CometBroadcastExchangeExec") { + assume(isSpark34Plus, "ChunkedByteBuffer is not serializable before Spark 3.4+") withSQLConf(CometConf.COMET_EXEC_BROADCAST_ENABLED.key -> "true") { withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl_a") { withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl_b") { @@ -97,6 +98,7 @@ class CometExecSuite extends CometTestBase { } test("CometBroadcastExchangeExec: empty broadcast") { + assume(isSpark34Plus, "ChunkedByteBuffer is not serializable before Spark 3.4+") withSQLConf(CometConf.COMET_EXEC_BROADCAST_ENABLED.key -> "true") { withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl_a") { withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl_b") { @@ -116,6 +118,7 @@ class CometExecSuite extends CometTestBase { } test("CometExec.executeColumnarCollectIterator can collect ColumnarBatch results") { + assume(isSpark34Plus, "ChunkedByteBuffer is not serializable before Spark 3.4+") withSQLConf( CometConf.COMET_EXEC_ENABLED.key -> "true", CometConf.COMET_EXEC_ALL_OPERATOR_ENABLED.key -> "true") { diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala index 0d7904c01..38a8d7d2f 100644 --- a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala @@ -183,7 +183,7 @@ abstract class CometTestBase protected def checkPlanContains(plan: SparkPlan, includePlans: Class[_]*): Unit = { includePlans.foreach { case planClass => - if (!plan.exists(op => planClass.isAssignableFrom(op.getClass))) { + if (plan.find(op => planClass.isAssignableFrom(op.getClass)).isEmpty) { assert( false, s"Expected plan to contain ${planClass.getSimpleName}, but not.\n" +