diff --git a/.github/workflows/gluten.yml b/.github/workflows/gluten.yml new file mode 100644 index 00000000000..38312915432 --- /dev/null +++ b/.github/workflows/gluten.yml @@ -0,0 +1,122 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Gluten CI + +on: + schedule: + - cron: 0 4 * * * + +env: + MVN_OPT: -Dmaven.javadoc.skip=true -Drat.skip=true -Dscalastyle.skip=true -Dspotless.check.skip -Dorg.slf4j.simpleLogger.defaultLogLevel=warn -Pjdbc-shaded,gen-policy -Dmaven.plugin.download.cache.path=/tmp/engine-archives + +jobs: + gluten-build: + name: Build Gluten + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + - name: Update and Upgrade + run: sudo apt-get update && sudo apt-get upgrade -y + - name: Install dependencies + run: | + sudo apt-get install -y software-properties-common + sudo apt-get install -y libunwind-dev build-essential cmake libssl-dev libre2-dev libcurl4-openssl-dev clang lldb lld libz-dev git ninja-build uuid-dev + - name: Setup JDK 8 + uses: actions/setup-java@v3 + with: + distribution: temurin + java-version: 8 + cache: 'maven' + check-latest: false + - name: Setup Maven + uses: ./.github/actions/setup-maven + - name: Get gluten cache date + id: date + run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT + - name: Check gluten cache + id: gluten-cache + uses: actions/cache@v3 + with: + path: gluten/package/target/ + key: gluten_package_${{ steps.date.outputs.date }} + - name: Build gluten project + run: | + if [[ "${{ steps.gluten-cache.outputs.cache-hit }}" != 'true' ]]; then + git clone https://github.com/oap-project/gluten.git + cd gluten + ./dev/buildbundle-veloxbe.sh + fi + - uses: actions/cache@v3 + if: steps.gluten-cache.outputs.cache-hit != 'true' + with: + path: gluten/package/target/ + key: gluten_package_${{ steps.date.outputs.date }} + + gluten-test: + name: Gluten TPC-H/DS Test + needs: gluten-build + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + module: [ "extensions/spark/kyuubi-spark-connector-tpcds", "extensions/spark/kyuubi-spark-connector-tpch" ] + steps: + - uses: actions/checkout@v4 + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + - name: Update and Upgrade + run: sudo apt-get update && sudo apt-get upgrade -y + - name: Install dependencies + run: | + sudo apt-get install -y software-properties-common + sudo apt-get install -y libunwind-dev build-essential cmake libssl-dev libre2-dev libcurl4-openssl-dev clang lldb lld libz-dev git ninja-build uuid-dev + sudo apt-get install -y libsnappy-dev libthrift-dev libboost-all-dev libgflags-dev libgoogle-glog-dev + - name: Cache Engine Archives + uses: ./.github/actions/cache-engine-archives + - name: Get gluten cache date + id: date + run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT + - name: Check gluten cache + id: gluten-cache + uses: actions/cache@v3 + with: + path: gluten/package/target/ + key: gluten_package_${{ steps.date.outputs.date }} + - name: Cache Gluten Package + uses: actions/cache@v3 + with: + path: gluten/package/target/ + key: gluten_package + - name: Setup JDK 8 + uses: actions/setup-java@v3 + with: + distribution: temurin + java-version: 8 + cache: 'maven' + check-latest: false + - name: Setup Maven + uses: ./.github/actions/setup-maven + - name: Run TPC-H/DS Test + run: | + TEST_MODULES=${{ matrix.module }} + ./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} -am clean install -DskipTests -Pgluten -Pspark-3.4 + ./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} -am -Pgluten -Pspark-3.4 test \ + -Dmaven.plugin.scalatest.exclude.tags='' \ + -Dtest=none -Dmaven.plugin.scalatest.include.tags='org.apache.kyuubi.tags.GlutenTest' diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 6c12700d831..4608ec45e27 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -60,22 +60,22 @@ jobs: - java: 8 spark: '3.4' spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.1.3 -Dspark.archive.name=spark-3.1.3-bin-hadoop3.2.tgz -Pzookeeper-3.6' - exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest' + exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest' comment: 'verify-on-spark-3.1-binary' - java: 8 spark: '3.4' spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.2.4 -Dspark.archive.name=spark-3.2.4-bin-hadoop3.2.tgz -Pzookeeper-3.6' - exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest' + exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest' comment: 'verify-on-spark-3.2-binary' - java: 8 spark: '3.4' spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.3.3 -Dspark.archive.name=spark-3.3.3-bin-hadoop3.tgz -Pzookeeper-3.6' - exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest' + exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest' comment: 'verify-on-spark-3.3-binary' - java: 8 spark: '3.4' spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.5.0 -Dspark.archive.name=spark-3.5.0-bin-hadoop3.tgz -Pzookeeper-3.6' - exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.PaimonTest,org.apache.kyuubi.tags.SparkLocalClusterTest' + exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.PaimonTest,org.apache.kyuubi.tags.SparkLocalClusterTest' comment: 'verify-on-spark-3.5-binary' exclude: # SPARK-33772: Spark supports JDK 17 since 3.3.0 diff --git a/docs/deployment/spark/gluten.md b/docs/deployment/spark/gluten.md new file mode 100644 index 00000000000..76e21290765 --- /dev/null +++ b/docs/deployment/spark/gluten.md @@ -0,0 +1,52 @@ + + + +# Gluten + +Gluten is a Spark plugin developed by Intel, designed to accelerate Apache Spark with native libraries. Currently, only CentOS 7/8 and Ubuntu 20.04/22.04, along with Spark 3.2/3.3/3.4, are supported. Users can employ the following methods to utilize the Gluten with Velox native libraries. + +## Building(with velox Backend) + +### Build gluten velox backend package + +Git clone gluten project, use gluten build script `buildbundle-veloxbe.sh`, and target package is in `/path/to/gluten/package/target/` +```bash +git clone https://github.com/oap-project/gluten.git +cd /path/to/gluten + +## The script builds two jars for spark 3.2.x, 3.3.x, and 3.4.x. +./dev/buildbundle-veloxbe.sh +``` + +## Usage + +You can use Gluten to accelerate Spark by following steps. + +### Installing + +add gluten jar: `copy /path/to/gluten/package/target/gluten-velox-bundle-spark3.x_2.12-*.jar $SPARK_HOME/jars/` or specified to `spark.jars` configuration + +### Configure + +add config into `spark-defaults.conf`: +```properties +spark.plugins=io.glutenproject.GlutenPlugin +spark.memory.offHeap.size=20g +spark.memory.offHeap.enabled=true +spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager +``` diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/pom.xml b/extensions/spark/kyuubi-spark-connector-tpcds/pom.xml index 5999b8c6304..b7fa4cb5ac4 100644 --- a/extensions/spark/kyuubi-spark-connector-tpcds/pom.xml +++ b/extensions/spark/kyuubi-spark-connector-tpcds/pom.xml @@ -213,4 +213,29 @@ target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes + + + + gluten + + org.apache.kyuubi.tags.GlutenTest + 3.4.1 + 3.4 + + + + io.glutenproject + gluten-velox-bundle-spark3.4_2.12-ubuntu_22.04 + 1.1.0-SNAPSHOT + system + ${project.basedir}/../../../gluten/package/target/gluten-velox-bundle-spark3.4_2.12-ubuntu_22.04-1.1.0-SNAPSHOT.jar + + + org.apache.spark + spark-hive_${scala.binary.version} + test + + + + diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/main/resources/kyuubi/load-tpcds-tiny.sql b/extensions/spark/kyuubi-spark-connector-tpcds/src/main/resources/kyuubi/load-tpcds-tiny.sql new file mode 100644 index 00000000000..952a9cf3a37 --- /dev/null +++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/main/resources/kyuubi/load-tpcds-tiny.sql @@ -0,0 +1,146 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +CREATE DATABASE IF NOT EXISTS spark_catalog.tpcds_tiny; + +USE spark_catalog.tpcds_tiny; + +-- +-- Name: catalog_sales; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS catalog_sales USING parquet PARTITIONED BY (cs_sold_date_sk) +AS SELECT * FROM tpcds.tiny.catalog_sales; + +-- +-- Name: catalog_returns; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS catalog_returns USING parquet PARTITIONED BY (cr_returned_date_sk) +AS SELECT * FROM tpcds.tiny.catalog_returns; + +-- +-- Name: inventory; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS inventory USING parquet PARTITIONED BY (inv_date_sk) +AS SELECT * FROM tpcds.tiny.inventory; + +-- +-- Name: store_sales; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS store_sales USING parquet PARTITIONED BY (ss_sold_date_sk) +AS SELECT * FROM tpcds.tiny.store_sales; + +-- +-- Name: store_returns; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS store_returns USING parquet PARTITIONED BY (sr_returned_date_sk) +AS SELECT * FROM tpcds.tiny.store_returns; + +-- +-- Name: web_sales; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS web_sales USING parquet PARTITIONED BY (ws_sold_date_sk) +AS SELECT * FROM tpcds.tiny.web_sales; + +-- +-- Name: web_returns; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS web_returns USING parquet PARTITIONED BY (wr_returned_date_sk) +AS SELECT * FROM tpcds.tiny.web_returns; + +-- +-- Name: call_center; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS call_center USING parquet AS SELECT * FROM tpcds.tiny.call_center; + +-- +-- Name: catalog_page; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS catalog_page USING parquet AS SELECT * FROM tpcds.tiny.catalog_page; + +-- +-- Name: customer; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS customer USING parquet AS SELECT * FROM tpcds.tiny.customer; + +-- +-- Name: customer_address; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS customer_address USING parquet AS SELECT * FROM tpcds.tiny.customer_address; + +-- +-- Name: customer_demographics; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS customer_demographics USING parquet AS SELECT * FROM tpcds.tiny.customer_demographics; + +-- +-- Name: date_dim; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS date_dim USING parquet AS SELECT * FROM tpcds.tiny.date_dim; + +-- +-- Name: household_demographics; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS household_demographics USING parquet AS SELECT * FROM tpcds.tiny.household_demographics; + +-- +-- Name: income_band; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS income_band USING parquet AS SELECT * FROM tpcds.tiny.income_band; + +-- +-- Name: item; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS item USING parquet AS SELECT * FROM tpcds.tiny.item; + +-- +-- Name: promotion; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS promotion USING parquet AS SELECT * FROM tpcds.tiny.promotion; + +-- +-- Name: reason; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS reason USING parquet AS SELECT * FROM tpcds.tiny.reason; + +-- +-- Name: ship_mode; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS ship_mode USING parquet AS SELECT * FROM tpcds.tiny.ship_mode; + +-- +-- Name: store; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS store USING parquet AS SELECT * FROM tpcds.tiny.store; + +-- +-- Name: time_dim; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS time_dim USING parquet AS SELECT * FROM tpcds.tiny.time_dim; + +-- +-- Name: warehouse; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS warehouse USING parquet AS SELECT * FROM tpcds.tiny.warehouse; + +-- +-- Name: web_page; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS web_page USING parquet AS SELECT * FROM tpcds.tiny.web_page; + +-- +-- Name: web_site; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS web_site USING parquet AS SELECT * FROM tpcds.tiny.web_site; diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala index 0eed970a4cd..413c54ad44f 100644 --- a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala +++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala @@ -27,16 +27,9 @@ import org.apache.kyuubi.KyuubiFunSuite import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession import org.apache.kyuubi.spark.connector.common.SparkUtils.SPARK_RUNTIME_VERSION -class TPCDSCatalogSuite extends KyuubiFunSuite { +class TPCDSCatalogSuite extends KyuubiFunSuite with TPCDSSuiteBase { test("get catalog name") { - val sparkConf = new SparkConf() - .setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName) - .set("spark.sql.cbo.enabled", "true") - .set("spark.sql.cbo.planStats.enabled", "true") withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { _ => val catalog = new TPCDSCatalog val catalogName = "test" @@ -46,13 +39,6 @@ class TPCDSCatalogSuite extends KyuubiFunSuite { } test("supports namespaces") { - val sparkConf = new SparkConf() - .setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName) - .set("spark.sql.cbo.enabled", "true") - .set("spark.sql.cbo.planStats.enabled", "true") withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => spark.sql("USE tpcds") assert(spark.sql(s"SHOW DATABASES").collect().length == 11) @@ -65,12 +51,10 @@ class TPCDSCatalogSuite extends KyuubiFunSuite { "TINY,sf10" -> Seq("tiny", "sf10"), "sf1 , " -> Seq("sf1"), "none" -> Seq.empty[String]).foreach { case (confValue, expectedExcludeDatabases) => - val sparkConf = new SparkConf().setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName) - .set("spark.sql.catalog.tpcds.excludeDatabases", confValue) - withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + val conf = sparkConf.set("spark.sql.catalog.tpcds.excludeDatabases", confValue) + .remove("spark.sql.cbo.enabled") + .remove("spark.sql.cbo.planStats.enabled") + withSparkSession(SparkSession.builder.config(conf).getOrCreate()) { spark => spark.sql("USE tpcds") assert( spark.sql(s"SHOW DATABASES").collect.map(_.getString(0)).sorted === @@ -80,13 +64,6 @@ class TPCDSCatalogSuite extends KyuubiFunSuite { } test("tpcds.sf1 stats") { - val sparkConf = new SparkConf() - .setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName) - .set("spark.sql.cbo.enabled", "true") - .set("spark.sql.cbo.planStats.enabled", "true") withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => def assertStats(tableName: String, sizeInBytes: BigInt, rowCount: BigInt): Unit = { val stats = spark.table(tableName).queryExecution.analyzed.stats @@ -125,13 +102,6 @@ class TPCDSCatalogSuite extends KyuubiFunSuite { } test("nonexistent table") { - val sparkConf = new SparkConf() - .setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName) - .set("spark.sql.cbo.enabled", "true") - .set("spark.sql.cbo.planStats.enabled", "true") withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => val exception = intercept[AnalysisException] { spark.table("tpcds.sf1.nonexistent_table") @@ -142,13 +112,6 @@ class TPCDSCatalogSuite extends KyuubiFunSuite { } test("tpcds.tiny count and checksum") { - val sparkConf = new SparkConf() - .setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName) - .set("spark.sql.cbo.enabled", "true") - .set("spark.sql.cbo.planStats.enabled", "true") withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => tableInfo.foreach { case (table, (expectCount, expectChecksum)) => @@ -205,4 +168,9 @@ class TPCDSCatalogSuite extends KyuubiFunSuite { ("tpcds.tiny.web_returns", ("1152", "2464383243098")), ("tpcds.tiny.web_sales", ("11876", "25458905770096")), ("tpcds.tiny.web_site", ("2", "3798438288"))) + + override def sparkConf: SparkConf = { + super.sparkConf.set("spark.sql.cbo.enabled", "true") + .set("spark.sql.cbo.planStats.enabled", "true") + } } diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSQuerySuite.scala index c99d7becafa..e44288bee25 100644 --- a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSQuerySuite.scala +++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSQuerySuite.scala @@ -40,7 +40,7 @@ import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSessi * }}} */ @Slow -class TPCDSQuerySuite extends KyuubiFunSuite { +class TPCDSQuerySuite extends KyuubiFunSuite with TPCDSSuiteBase { val queries: Set[String] = (1 to 99).map(i => s"q$i").toSet - ("q14", "q23", "q24", "q39") + @@ -48,13 +48,8 @@ class TPCDSQuerySuite extends KyuubiFunSuite { test("run query on tiny") { val viewSuffix = "view" - val sparkConf = new SparkConf().setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName) - .set("spark.sql.catalog.tpcds.useTableSchema_2_6", "true") withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => - spark.sql("USE tpcds.tiny") + loadTPDSTINY(spark) queries.map { queryName => val in = Utils.getContextOrKyuubiClassLoader .getResourceAsStream(s"kyuubi/tpcds_3.2/$queryName.sql") @@ -79,4 +74,12 @@ class TPCDSQuerySuite extends KyuubiFunSuite { } } } + + override def sparkConf: SparkConf = { + super.sparkConf.set("spark.sql.catalog.tpcds.useTableSchema_2_6", "true") + } + + def loadTPDSTINY(sc: SparkSession): Unit = { + sc.sql("USE tpcds.tiny") + } } diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSSuiteBase.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSSuiteBase.scala new file mode 100644 index 00000000000..7bcb12a73a5 --- /dev/null +++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSSuiteBase.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.spark.connector.tpcds + +import org.apache.spark.SparkConf + +trait TPCDSSuiteBase { + def sparkConf: SparkConf = { + new SparkConf().setMaster("local[*]") + .set("spark.ui.enabled", "false") + .set("spark.sql.catalogImplementation", "in-memory") + .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName) + } +} diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSTableSuite.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSTableSuite.scala index f948ec4efd1..f93450f0bd6 100644 --- a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSTableSuite.scala +++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSTableSuite.scala @@ -19,7 +19,6 @@ package org.apache.kyuubi.spark.connector.tpcds import io.trino.tpcds.Table import io.trino.tpcds.generator.CallCenterGeneratorColumn -import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession import org.apache.spark.sql.execution.datasources.v2.BatchScanExec @@ -27,16 +26,12 @@ import org.apache.kyuubi.KyuubiFunSuite import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession import org.apache.kyuubi.spark.connector.tpcds.TPCDSConf._ -class TPCDSTableSuite extends KyuubiFunSuite { +class TPCDSTableSuite extends KyuubiFunSuite with TPCDSSuiteBase { test("useAnsiStringType (true, false)") { Seq(true, false).foreach(key => { - val sparkConf = new SparkConf().setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName) - .set("spark.sql.catalog.tpcds.useAnsiStringType", key.toString) - withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + val conf = sparkConf.set("spark.sql.catalog.tpcds.useAnsiStringType", key.toString) + withSparkSession(SparkSession.builder.config(conf).getOrCreate()) { spark => val rows = spark.sql("desc tpcds.sf1.call_center").collect() rows.foreach(row => { val dataType = row.getString(1) @@ -63,10 +58,6 @@ class TPCDSTableSuite extends KyuubiFunSuite { test("test nullable column") { TPCDSSchemaUtils.BASE_TABLES.foreach { tpcdsTable => val tableName = tpcdsTable.getName - val sparkConf = new SparkConf().setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName) withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => val sparkTable = spark.table(s"tpcds.sf1.$tableName") var notNullBitMap = 0 @@ -125,14 +116,10 @@ class TPCDSTableSuite extends KyuubiFunSuite { test("test maxPartitionBytes") { val maxPartitionBytes: Long = 1 * 1024 * 1024L - val sparkConf = new SparkConf().setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName) - .set( - s"$TPCDS_CONNECTOR_READ_CONF_PREFIX.$MAX_PARTITION_BYTES_CONF", - String.valueOf(maxPartitionBytes)) - withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + val conf = sparkConf.set( + s"$TPCDS_CONNECTOR_READ_CONF_PREFIX.$MAX_PARTITION_BYTES_CONF", + String.valueOf(maxPartitionBytes)) + withSparkSession(SparkSession.builder.config(conf).getOrCreate()) { spark => val tableName = "catalog_returns" val table = Table.getTable(tableName) val scale = 100 diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/gluten/GlutenTPCDSQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/gluten/GlutenTPCDSQuerySuite.scala new file mode 100644 index 00000000000..609d2fbbc3a --- /dev/null +++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/gluten/GlutenTPCDSQuerySuite.scala @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.spark.connector.tpcds.gluten + +import scala.io.{Codec, Source} + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession +import org.scalatest.tags.Slow + +import org.apache.kyuubi.{GlutenSuiteMixin, Utils} +import org.apache.kyuubi.spark.connector.tpcds.TPCDSQuerySuite +import org.apache.kyuubi.tags.GlutenTest + +@Slow +@GlutenTest +class GlutenTPCDSQuerySuite extends TPCDSQuerySuite with GlutenSuiteMixin { + + // TODO:tpc-ds exec time over six hour + override val queries: Set[String] = (1 to 99).map(i => s"q$i").toSet - + ("q14", "q23", "q24", "q39") + + ("q14a", "q14b", "q23a", "q23b", "q24a", "q24b", "q39a", "q39b") - + // TODO:Fix gluten tpc-ds query test + ("q1", "q4", "q7", "q11", "q12", "q17", "q20", "q21", "q25", "q26", "q29", "q30", "q34", "q37", + "q39a", "q39b", "q40", "q43", "q46", "q49", "q56", "q58", "q59", "q60", "q68", "q73", "q74", + "q78", "q79", "q81", "q82", "q83", "q84", "q91", "q98") + override def sparkConf: SparkConf = { + val glutenConf = super.sparkConf + extraConfigs.foreach { case (k, v) => glutenConf.set(k, v) } + glutenConf + } + + override def loadTPDSTINY(sc: SparkSession): Unit = { + val in = Utils.getContextOrKyuubiClassLoader.getResourceAsStream("kyuubi/load-tpcds-tiny.sql") + val queryContent: String = Source.fromInputStream(in)(Codec.UTF8).mkString + in.close() + queryContent.split(";\n").filterNot(_.trim.isEmpty).foreach { sql => + sc.sql(sql) + } + } +} diff --git a/extensions/spark/kyuubi-spark-connector-tpch/pom.xml b/extensions/spark/kyuubi-spark-connector-tpch/pom.xml index 22a5405a6a0..611214655fd 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/pom.xml +++ b/extensions/spark/kyuubi-spark-connector-tpch/pom.xml @@ -219,4 +219,29 @@ target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes + + + + gluten + + org.apache.kyuubi.tags.GlutenTest + 3.4.1 + 3.4 + + + + io.glutenproject + gluten-velox-bundle-spark3.4_2.12-ubuntu_22.04 + 1.1.0-SNAPSHOT + system + ${project.basedir}/../../../gluten/package/target/gluten-velox-bundle-spark3.4_2.12-ubuntu_22.04-1.1.0-SNAPSHOT.jar + + + org.apache.spark + spark-hive_2.12 + test + + + + diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/load-tpch-tiny.sql b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/load-tpch-tiny.sql new file mode 100644 index 00000000000..8f2228f549c --- /dev/null +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/load-tpch-tiny.sql @@ -0,0 +1,59 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +CREATE DATABASE IF NOT EXISTS spark_catalog.tpch_tiny; + +USE spark_catalog.tpch_tiny; + +-- +-- Name: customer; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS customer USING parquet AS SELECT * FROM tpch.tiny.customer; + +-- +-- Name: orders; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS orders USING parquet AS SELECT * FROM tpch.tiny.orders; + +-- +-- Name: lineitem; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS lineitem USING parquet AS SELECT * FROM tpch.tiny.lineitem; + +-- +-- Name: part; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS part USING parquet AS SELECT * FROM tpch.tiny.part; + +-- +-- Name: partsupp; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS partsupp USING parquet AS SELECT * FROM tpch.tiny.partsupp; + +-- +-- Name: supplier; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS supplier USING parquet AS SELECT * FROM tpch.tiny.supplier; + +-- +-- Name: nation; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS nation USING parquet AS SELECT * FROM tpch.tiny.nation; + +-- +-- Name: region; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS region USING parquet AS SELECT * FROM tpch.tiny.region; diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala index 14415141e63..0469d385b5b 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala @@ -25,16 +25,9 @@ import org.apache.kyuubi.KyuubiFunSuite import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession import org.apache.kyuubi.spark.connector.common.SparkUtils.SPARK_RUNTIME_VERSION -class TPCHCatalogSuite extends KyuubiFunSuite { +class TPCHCatalogSuite extends KyuubiFunSuite with TPCHSuiteBase { test("get catalog name") { - val sparkConf = new SparkConf() - .setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName) - .set("spark.sql.cbo.enabled", "true") - .set("spark.sql.cbo.planStats.enabled", "true") withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { _ => val catalog = new TPCHCatalog val catalogName = "test" @@ -44,13 +37,6 @@ class TPCHCatalogSuite extends KyuubiFunSuite { } test("supports namespaces") { - val sparkConf = new SparkConf() - .setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName) - .set("spark.sql.cbo.enabled", "true") - .set("spark.sql.cbo.planStats.enabled", "true") withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => spark.sql("USE tpch") assert(spark.sql(s"SHOW DATABASES").collect().length == 12) @@ -63,12 +49,10 @@ class TPCHCatalogSuite extends KyuubiFunSuite { "TINY,sf10" -> Seq("tiny", "sf10"), "sf1 , " -> Seq("sf1"), "none" -> Seq.empty[String]).foreach { case (confValue, expectedExcludeDatabases) => - val sparkConf = new SparkConf().setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName) - .set("spark.sql.catalog.tpch.excludeDatabases", confValue) - withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + val conf = sparkConf.set("spark.sql.catalog.tpch.excludeDatabases", confValue) + .remove("spark.sql.cbo.enabled") + .remove("spark.sql.cbo.planStats.enabled") + withSparkSession(SparkSession.builder.config(conf).getOrCreate()) { spark => spark.sql("USE tpch") assert( spark.sql(s"SHOW DATABASES").collect.map(_.getString(0)).sorted === @@ -78,13 +62,6 @@ class TPCHCatalogSuite extends KyuubiFunSuite { } test("tpch.tiny count") { - val sparkConf = new SparkConf() - .setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName) - .set("spark.sql.cbo.enabled", "true") - .set("spark.sql.cbo.planStats.enabled", "true") withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => assert(spark.table("tpch.tiny.customer").count === 1500) assert(spark.table("tpch.tiny.orders").count === 15000) @@ -98,13 +75,6 @@ class TPCHCatalogSuite extends KyuubiFunSuite { } test("tpch.sf0 count") { - val sparkConf = new SparkConf() - .setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName) - .set("spark.sql.cbo.enabled", "true") - .set("spark.sql.cbo.planStats.enabled", "true") withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => assert(spark.table("tpch.sf0.customer").count === 0) assert(spark.table("tpch.sf0.orders").count === 0) @@ -118,13 +88,6 @@ class TPCHCatalogSuite extends KyuubiFunSuite { } test("tpch.sf1 stats") { - val sparkConf = new SparkConf() - .setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName) - .set("spark.sql.cbo.enabled", "true") - .set("spark.sql.cbo.planStats.enabled", "true") withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => def assertStats(tableName: String, sizeInBytes: BigInt, rowCount: BigInt): Unit = { val stats = spark.table(tableName).queryExecution.analyzed.stats @@ -147,13 +110,6 @@ class TPCHCatalogSuite extends KyuubiFunSuite { } test("nonexistent table") { - val sparkConf = new SparkConf() - .setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName) - .set("spark.sql.cbo.enabled", "true") - .set("spark.sql.cbo.planStats.enabled", "true") withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => val exception = intercept[AnalysisException] { spark.table("tpch.sf1.nonexistent_table") @@ -162,4 +118,9 @@ class TPCHCatalogSuite extends KyuubiFunSuite { || exception.message.contains("TABLE_OR_VIEW_NOT_FOUND")) } } + + override def sparkConf: SparkConf = { + super.sparkConf.set("spark.sql.cbo.enabled", "true") + .set("spark.sql.cbo.planStats.enabled", "true") + } } diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHQuerySuite.scala index c651d930043..3353f5d43c7 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHQuerySuite.scala +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHQuerySuite.scala @@ -20,7 +20,6 @@ package org.apache.kyuubi.spark.connector.tpch import scala.collection.JavaConverters._ import scala.io.{Codec, Source} -import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession import org.scalatest.tags.Slow @@ -40,18 +39,14 @@ import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSessi * }}} */ @Slow -class TPCHQuerySuite extends KyuubiFunSuite { +class TPCHQuerySuite extends KyuubiFunSuite with TPCHSuiteBase { - val queries: List[String] = (1 to 22).map(i => s"q$i").toList + val queries: Set[String] = (1 to 22).map(i => s"q$i").toSet test("run query on tiny") { val viewSuffix = "view" - val sparkConf = new SparkConf().setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName) withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => - spark.sql("USE tpch.tiny") + loadTPCHTINY(spark) queries.map { queryName => val in = Utils.getContextOrKyuubiClassLoader.getResourceAsStream( s"kyuubi/tpch/$queryName.sql") @@ -71,4 +66,8 @@ class TPCHQuerySuite extends KyuubiFunSuite { } } } + + def loadTPCHTINY(sc: SparkSession): Unit = { + sc.sql("USE tpch.tiny") + } } diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHSuiteBase.scala b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHSuiteBase.scala new file mode 100644 index 00000000000..4ec24923488 --- /dev/null +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHSuiteBase.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.spark.connector.tpch + +import org.apache.spark.SparkConf + +trait TPCHSuiteBase { + def sparkConf: SparkConf = { + new SparkConf().setMaster("local[*]") + .set("spark.ui.enabled", "false") + .set("spark.sql.catalogImplementation", "in-memory") + .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName) + } +} diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/gluten/GlutenTPCHQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/gluten/GlutenTPCHQuerySuite.scala new file mode 100644 index 00000000000..aae00efd828 --- /dev/null +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/gluten/GlutenTPCHQuerySuite.scala @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.spark.connector.tpch.gluten + +import scala.io.{Codec, Source} + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession +import org.scalatest.tags.Slow + +import org.apache.kyuubi.{GlutenSuiteMixin, Utils} +import org.apache.kyuubi.spark.connector.tpch.TPCHQuerySuite +import org.apache.kyuubi.tags.GlutenTest + +@Slow +@GlutenTest +class GlutenTPCHQuerySuite extends TPCHQuerySuite with GlutenSuiteMixin { + // TODO: Fix the inconsistency in q9 results. + override val queries: Set[String] = (1 to 22).map(i => s"q$i").toSet - "q9" + + override def sparkConf: SparkConf = { + val glutenConf = super.sparkConf + extraConfigs.foreach { case (k, v) => glutenConf.set(k, v) } + glutenConf + } + + override def loadTPCHTINY(sc: SparkSession): Unit = { + val in = Utils.getContextOrKyuubiClassLoader.getResourceAsStream("kyuubi/load-tpch-tiny.sql") + val queryContent: String = Source.fromInputStream(in)(Codec.UTF8).mkString + in.close() + queryContent.split(";\n").filterNot(_.trim.isEmpty).foreach { sql => + sc.sql(sql) + } + } + +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/GlutenSuiteMixin.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/GlutenSuiteMixin.scala new file mode 100644 index 00000000000..6c17c5b9392 --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/GlutenSuiteMixin.scala @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi + +trait GlutenSuiteMixin { + protected def extraJars: String = { + System.getProperty("java.class.path") + .split(":") + .filter(_.contains("gluten-velox-bundle-spark")).head + } + + protected def extraConfigs = Map( + "spark.plugins" -> "io.glutenproject.GlutenPlugin", + "spark.memory.offHeap.size" -> "20g", + "spark.memory.offHeap.enabled" -> "true", + "spark.shuffle.manager" -> "org.apache.spark.shuffle.sort.ColumnarShuffleManager", + "spark.jars" -> extraJars) +} diff --git a/kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/GlutenTest.java b/kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/GlutenTest.java new file mode 100644 index 00000000000..8620df4b95a --- /dev/null +++ b/kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/GlutenTest.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.tags; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; +import org.scalatest.TagAnnotation; + +@TagAnnotation +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.METHOD, ElementType.TYPE}) +public @interface GlutenTest {} diff --git a/pom.xml b/pom.xml index b7ba11018e1..b25370bd66b 100644 --- a/pom.xml +++ b/pom.xml @@ -236,7 +236,7 @@ 1.12.1 4.8.0 2.2.0 - org.scalatest.tags.Slow,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest + org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest false 2.30.0 @@ -2231,7 +2231,7 @@ 1.3.1 spark-${spark.version}-bin-hadoop3.2.tgz - org.scalatest.tags.Slow + org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest @@ -2247,7 +2247,7 @@ delta-core 2.0.2 spark-${spark.version}-bin-hadoop3.2${spark.archive.scala.suffix}.tgz - org.scalatest.tags.Slow + org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest @@ -2263,7 +2263,7 @@ delta-core 3.3.3 3.3 - org.scalatest.tags.Slow + org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest @@ -2278,7 +2278,7 @@ 2.4.0 3.4.1 3.4 - org.scalatest.tags.Slow + org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest @@ -2296,7 +2296,7 @@ 3.4 3.5.0 3.5 - org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.PySparkTest,org.apache.kyuubi.tags.PaimonTest + org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.PySparkTest,org.apache.kyuubi.tags.PaimonTest @@ -2304,7 +2304,7 @@ spark-master 4.0.0-SNAPSHOT - org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.PySparkTest + org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.PySparkTest