diff --git a/.github/workflows/gluten.yml b/.github/workflows/gluten.yml
new file mode 100644
index 00000000000..38312915432
--- /dev/null
+++ b/.github/workflows/gluten.yml
@@ -0,0 +1,122 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+name: Gluten CI
+
+on:
+ schedule:
+ - cron: 0 4 * * *
+
+env:
+ MVN_OPT: -Dmaven.javadoc.skip=true -Drat.skip=true -Dscalastyle.skip=true -Dspotless.check.skip -Dorg.slf4j.simpleLogger.defaultLogLevel=warn -Pjdbc-shaded,gen-policy -Dmaven.plugin.download.cache.path=/tmp/engine-archives
+
+jobs:
+ gluten-build:
+ name: Build Gluten
+ runs-on: ubuntu-22.04
+ steps:
+ - uses: actions/checkout@v4
+ - name: Tune Runner VM
+ uses: ./.github/actions/tune-runner-vm
+ - name: Update and Upgrade
+ run: sudo apt-get update && sudo apt-get upgrade -y
+ - name: Install dependencies
+ run: |
+ sudo apt-get install -y software-properties-common
+ sudo apt-get install -y libunwind-dev build-essential cmake libssl-dev libre2-dev libcurl4-openssl-dev clang lldb lld libz-dev git ninja-build uuid-dev
+ - name: Setup JDK 8
+ uses: actions/setup-java@v3
+ with:
+ distribution: temurin
+ java-version: 8
+ cache: 'maven'
+ check-latest: false
+ - name: Setup Maven
+ uses: ./.github/actions/setup-maven
+ - name: Get gluten cache date
+ id: date
+ run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+ - name: Check gluten cache
+ id: gluten-cache
+ uses: actions/cache@v3
+ with:
+ path: gluten/package/target/
+ key: gluten_package_${{ steps.date.outputs.date }}
+ - name: Build gluten project
+ run: |
+ if [[ "${{ steps.gluten-cache.outputs.cache-hit }}" != 'true' ]]; then
+ git clone https://github.com/oap-project/gluten.git
+ cd gluten
+ ./dev/buildbundle-veloxbe.sh
+ fi
+ - uses: actions/cache@v3
+ if: steps.gluten-cache.outputs.cache-hit != 'true'
+ with:
+ path: gluten/package/target/
+ key: gluten_package_${{ steps.date.outputs.date }}
+
+ gluten-test:
+ name: Gluten TPC-H/DS Test
+ needs: gluten-build
+ runs-on: ubuntu-22.04
+ strategy:
+ fail-fast: false
+ matrix:
+ module: [ "extensions/spark/kyuubi-spark-connector-tpcds", "extensions/spark/kyuubi-spark-connector-tpch" ]
+ steps:
+ - uses: actions/checkout@v4
+ - name: Tune Runner VM
+ uses: ./.github/actions/tune-runner-vm
+ - name: Update and Upgrade
+ run: sudo apt-get update && sudo apt-get upgrade -y
+ - name: Install dependencies
+ run: |
+ sudo apt-get install -y software-properties-common
+ sudo apt-get install -y libunwind-dev build-essential cmake libssl-dev libre2-dev libcurl4-openssl-dev clang lldb lld libz-dev git ninja-build uuid-dev
+ sudo apt-get install -y libsnappy-dev libthrift-dev libboost-all-dev libgflags-dev libgoogle-glog-dev
+ - name: Cache Engine Archives
+ uses: ./.github/actions/cache-engine-archives
+ - name: Get gluten cache date
+ id: date
+ run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+ - name: Check gluten cache
+ id: gluten-cache
+ uses: actions/cache@v3
+ with:
+ path: gluten/package/target/
+ key: gluten_package_${{ steps.date.outputs.date }}
+ - name: Cache Gluten Package
+ uses: actions/cache@v3
+ with:
+ path: gluten/package/target/
+ key: gluten_package
+ - name: Setup JDK 8
+ uses: actions/setup-java@v3
+ with:
+ distribution: temurin
+ java-version: 8
+ cache: 'maven'
+ check-latest: false
+ - name: Setup Maven
+ uses: ./.github/actions/setup-maven
+ - name: Run TPC-H/DS Test
+ run: |
+ TEST_MODULES=${{ matrix.module }}
+ ./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} -am clean install -DskipTests -Pgluten -Pspark-3.4
+ ./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} -am -Pgluten -Pspark-3.4 test \
+ -Dmaven.plugin.scalatest.exclude.tags='' \
+ -Dtest=none -Dmaven.plugin.scalatest.include.tags='org.apache.kyuubi.tags.GlutenTest'
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 6c12700d831..4608ec45e27 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -60,22 +60,22 @@ jobs:
- java: 8
spark: '3.4'
spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.1.3 -Dspark.archive.name=spark-3.1.3-bin-hadoop3.2.tgz -Pzookeeper-3.6'
- exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest'
+ exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest'
comment: 'verify-on-spark-3.1-binary'
- java: 8
spark: '3.4'
spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.2.4 -Dspark.archive.name=spark-3.2.4-bin-hadoop3.2.tgz -Pzookeeper-3.6'
- exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest'
+ exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest'
comment: 'verify-on-spark-3.2-binary'
- java: 8
spark: '3.4'
spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.3.3 -Dspark.archive.name=spark-3.3.3-bin-hadoop3.tgz -Pzookeeper-3.6'
- exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest'
+ exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest'
comment: 'verify-on-spark-3.3-binary'
- java: 8
spark: '3.4'
spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.5.0 -Dspark.archive.name=spark-3.5.0-bin-hadoop3.tgz -Pzookeeper-3.6'
- exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.PaimonTest,org.apache.kyuubi.tags.SparkLocalClusterTest'
+ exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.PaimonTest,org.apache.kyuubi.tags.SparkLocalClusterTest'
comment: 'verify-on-spark-3.5-binary'
exclude:
# SPARK-33772: Spark supports JDK 17 since 3.3.0
diff --git a/docs/deployment/spark/gluten.md b/docs/deployment/spark/gluten.md
new file mode 100644
index 00000000000..76e21290765
--- /dev/null
+++ b/docs/deployment/spark/gluten.md
@@ -0,0 +1,52 @@
+
+
+
+# Gluten
+
+Gluten is a Spark plugin developed by Intel, designed to accelerate Apache Spark with native libraries. Currently, only CentOS 7/8 and Ubuntu 20.04/22.04, along with Spark 3.2/3.3/3.4, are supported. Users can employ the following methods to utilize the Gluten with Velox native libraries.
+
+## Building(with velox Backend)
+
+### Build gluten velox backend package
+
+Git clone gluten project, use gluten build script `buildbundle-veloxbe.sh`, and target package is in `/path/to/gluten/package/target/`
+```bash
+git clone https://github.com/oap-project/gluten.git
+cd /path/to/gluten
+
+## The script builds two jars for spark 3.2.x, 3.3.x, and 3.4.x.
+./dev/buildbundle-veloxbe.sh
+```
+
+## Usage
+
+You can use Gluten to accelerate Spark by following steps.
+
+### Installing
+
+add gluten jar: `copy /path/to/gluten/package/target/gluten-velox-bundle-spark3.x_2.12-*.jar $SPARK_HOME/jars/` or specified to `spark.jars` configuration
+
+### Configure
+
+add config into `spark-defaults.conf`:
+```properties
+spark.plugins=io.glutenproject.GlutenPlugin
+spark.memory.offHeap.size=20g
+spark.memory.offHeap.enabled=true
+spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager
+```
diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/pom.xml b/extensions/spark/kyuubi-spark-connector-tpcds/pom.xml
index 5999b8c6304..b7fa4cb5ac4 100644
--- a/extensions/spark/kyuubi-spark-connector-tpcds/pom.xml
+++ b/extensions/spark/kyuubi-spark-connector-tpcds/pom.xml
@@ -213,4 +213,29 @@
target/scala-${scala.binary.version}/classes
target/scala-${scala.binary.version}/test-classes
+
+
+
+ gluten
+
+ org.apache.kyuubi.tags.GlutenTest
+ 3.4.1
+ 3.4
+
+
+
+ io.glutenproject
+ gluten-velox-bundle-spark3.4_2.12-ubuntu_22.04
+ 1.1.0-SNAPSHOT
+ system
+ ${project.basedir}/../../../gluten/package/target/gluten-velox-bundle-spark3.4_2.12-ubuntu_22.04-1.1.0-SNAPSHOT.jar
+
+
+ org.apache.spark
+ spark-hive_${scala.binary.version}
+ test
+
+
+
+
diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/main/resources/kyuubi/load-tpcds-tiny.sql b/extensions/spark/kyuubi-spark-connector-tpcds/src/main/resources/kyuubi/load-tpcds-tiny.sql
new file mode 100644
index 00000000000..952a9cf3a37
--- /dev/null
+++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/main/resources/kyuubi/load-tpcds-tiny.sql
@@ -0,0 +1,146 @@
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+CREATE DATABASE IF NOT EXISTS spark_catalog.tpcds_tiny;
+
+USE spark_catalog.tpcds_tiny;
+
+--
+-- Name: catalog_sales; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS catalog_sales USING parquet PARTITIONED BY (cs_sold_date_sk)
+AS SELECT * FROM tpcds.tiny.catalog_sales;
+
+--
+-- Name: catalog_returns; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS catalog_returns USING parquet PARTITIONED BY (cr_returned_date_sk)
+AS SELECT * FROM tpcds.tiny.catalog_returns;
+
+--
+-- Name: inventory; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS inventory USING parquet PARTITIONED BY (inv_date_sk)
+AS SELECT * FROM tpcds.tiny.inventory;
+
+--
+-- Name: store_sales; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS store_sales USING parquet PARTITIONED BY (ss_sold_date_sk)
+AS SELECT * FROM tpcds.tiny.store_sales;
+
+--
+-- Name: store_returns; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS store_returns USING parquet PARTITIONED BY (sr_returned_date_sk)
+AS SELECT * FROM tpcds.tiny.store_returns;
+
+--
+-- Name: web_sales; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS web_sales USING parquet PARTITIONED BY (ws_sold_date_sk)
+AS SELECT * FROM tpcds.tiny.web_sales;
+
+--
+-- Name: web_returns; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS web_returns USING parquet PARTITIONED BY (wr_returned_date_sk)
+AS SELECT * FROM tpcds.tiny.web_returns;
+
+--
+-- Name: call_center; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS call_center USING parquet AS SELECT * FROM tpcds.tiny.call_center;
+
+--
+-- Name: catalog_page; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS catalog_page USING parquet AS SELECT * FROM tpcds.tiny.catalog_page;
+
+--
+-- Name: customer; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS customer USING parquet AS SELECT * FROM tpcds.tiny.customer;
+
+--
+-- Name: customer_address; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS customer_address USING parquet AS SELECT * FROM tpcds.tiny.customer_address;
+
+--
+-- Name: customer_demographics; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS customer_demographics USING parquet AS SELECT * FROM tpcds.tiny.customer_demographics;
+
+--
+-- Name: date_dim; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS date_dim USING parquet AS SELECT * FROM tpcds.tiny.date_dim;
+
+--
+-- Name: household_demographics; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS household_demographics USING parquet AS SELECT * FROM tpcds.tiny.household_demographics;
+
+--
+-- Name: income_band; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS income_band USING parquet AS SELECT * FROM tpcds.tiny.income_band;
+
+--
+-- Name: item; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS item USING parquet AS SELECT * FROM tpcds.tiny.item;
+
+--
+-- Name: promotion; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS promotion USING parquet AS SELECT * FROM tpcds.tiny.promotion;
+
+--
+-- Name: reason; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS reason USING parquet AS SELECT * FROM tpcds.tiny.reason;
+
+--
+-- Name: ship_mode; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS ship_mode USING parquet AS SELECT * FROM tpcds.tiny.ship_mode;
+
+--
+-- Name: store; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS store USING parquet AS SELECT * FROM tpcds.tiny.store;
+
+--
+-- Name: time_dim; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS time_dim USING parquet AS SELECT * FROM tpcds.tiny.time_dim;
+
+--
+-- Name: warehouse; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS warehouse USING parquet AS SELECT * FROM tpcds.tiny.warehouse;
+
+--
+-- Name: web_page; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS web_page USING parquet AS SELECT * FROM tpcds.tiny.web_page;
+
+--
+-- Name: web_site; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS web_site USING parquet AS SELECT * FROM tpcds.tiny.web_site;
diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala
index 0eed970a4cd..413c54ad44f 100644
--- a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala
+++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala
@@ -27,16 +27,9 @@ import org.apache.kyuubi.KyuubiFunSuite
import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession
import org.apache.kyuubi.spark.connector.common.SparkUtils.SPARK_RUNTIME_VERSION
-class TPCDSCatalogSuite extends KyuubiFunSuite {
+class TPCDSCatalogSuite extends KyuubiFunSuite with TPCDSSuiteBase {
test("get catalog name") {
- val sparkConf = new SparkConf()
- .setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName)
- .set("spark.sql.cbo.enabled", "true")
- .set("spark.sql.cbo.planStats.enabled", "true")
withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { _ =>
val catalog = new TPCDSCatalog
val catalogName = "test"
@@ -46,13 +39,6 @@ class TPCDSCatalogSuite extends KyuubiFunSuite {
}
test("supports namespaces") {
- val sparkConf = new SparkConf()
- .setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName)
- .set("spark.sql.cbo.enabled", "true")
- .set("spark.sql.cbo.planStats.enabled", "true")
withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
spark.sql("USE tpcds")
assert(spark.sql(s"SHOW DATABASES").collect().length == 11)
@@ -65,12 +51,10 @@ class TPCDSCatalogSuite extends KyuubiFunSuite {
"TINY,sf10" -> Seq("tiny", "sf10"),
"sf1 , " -> Seq("sf1"),
"none" -> Seq.empty[String]).foreach { case (confValue, expectedExcludeDatabases) =>
- val sparkConf = new SparkConf().setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName)
- .set("spark.sql.catalog.tpcds.excludeDatabases", confValue)
- withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
+ val conf = sparkConf.set("spark.sql.catalog.tpcds.excludeDatabases", confValue)
+ .remove("spark.sql.cbo.enabled")
+ .remove("spark.sql.cbo.planStats.enabled")
+ withSparkSession(SparkSession.builder.config(conf).getOrCreate()) { spark =>
spark.sql("USE tpcds")
assert(
spark.sql(s"SHOW DATABASES").collect.map(_.getString(0)).sorted ===
@@ -80,13 +64,6 @@ class TPCDSCatalogSuite extends KyuubiFunSuite {
}
test("tpcds.sf1 stats") {
- val sparkConf = new SparkConf()
- .setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName)
- .set("spark.sql.cbo.enabled", "true")
- .set("spark.sql.cbo.planStats.enabled", "true")
withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
def assertStats(tableName: String, sizeInBytes: BigInt, rowCount: BigInt): Unit = {
val stats = spark.table(tableName).queryExecution.analyzed.stats
@@ -125,13 +102,6 @@ class TPCDSCatalogSuite extends KyuubiFunSuite {
}
test("nonexistent table") {
- val sparkConf = new SparkConf()
- .setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName)
- .set("spark.sql.cbo.enabled", "true")
- .set("spark.sql.cbo.planStats.enabled", "true")
withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
val exception = intercept[AnalysisException] {
spark.table("tpcds.sf1.nonexistent_table")
@@ -142,13 +112,6 @@ class TPCDSCatalogSuite extends KyuubiFunSuite {
}
test("tpcds.tiny count and checksum") {
- val sparkConf = new SparkConf()
- .setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName)
- .set("spark.sql.cbo.enabled", "true")
- .set("spark.sql.cbo.planStats.enabled", "true")
withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
tableInfo.foreach {
case (table, (expectCount, expectChecksum)) =>
@@ -205,4 +168,9 @@ class TPCDSCatalogSuite extends KyuubiFunSuite {
("tpcds.tiny.web_returns", ("1152", "2464383243098")),
("tpcds.tiny.web_sales", ("11876", "25458905770096")),
("tpcds.tiny.web_site", ("2", "3798438288")))
+
+ override def sparkConf: SparkConf = {
+ super.sparkConf.set("spark.sql.cbo.enabled", "true")
+ .set("spark.sql.cbo.planStats.enabled", "true")
+ }
}
diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSQuerySuite.scala
index c99d7becafa..e44288bee25 100644
--- a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSQuerySuite.scala
+++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSQuerySuite.scala
@@ -40,7 +40,7 @@ import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSessi
* }}}
*/
@Slow
-class TPCDSQuerySuite extends KyuubiFunSuite {
+class TPCDSQuerySuite extends KyuubiFunSuite with TPCDSSuiteBase {
val queries: Set[String] = (1 to 99).map(i => s"q$i").toSet -
("q14", "q23", "q24", "q39") +
@@ -48,13 +48,8 @@ class TPCDSQuerySuite extends KyuubiFunSuite {
test("run query on tiny") {
val viewSuffix = "view"
- val sparkConf = new SparkConf().setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName)
- .set("spark.sql.catalog.tpcds.useTableSchema_2_6", "true")
withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
- spark.sql("USE tpcds.tiny")
+ loadTPDSTINY(spark)
queries.map { queryName =>
val in = Utils.getContextOrKyuubiClassLoader
.getResourceAsStream(s"kyuubi/tpcds_3.2/$queryName.sql")
@@ -79,4 +74,12 @@ class TPCDSQuerySuite extends KyuubiFunSuite {
}
}
}
+
+ override def sparkConf: SparkConf = {
+ super.sparkConf.set("spark.sql.catalog.tpcds.useTableSchema_2_6", "true")
+ }
+
+ def loadTPDSTINY(sc: SparkSession): Unit = {
+ sc.sql("USE tpcds.tiny")
+ }
}
diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSSuiteBase.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSSuiteBase.scala
new file mode 100644
index 00000000000..7bcb12a73a5
--- /dev/null
+++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSSuiteBase.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.spark.connector.tpcds
+
+import org.apache.spark.SparkConf
+
+trait TPCDSSuiteBase {
+ def sparkConf: SparkConf = {
+ new SparkConf().setMaster("local[*]")
+ .set("spark.ui.enabled", "false")
+ .set("spark.sql.catalogImplementation", "in-memory")
+ .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName)
+ }
+}
diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSTableSuite.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSTableSuite.scala
index f948ec4efd1..f93450f0bd6 100644
--- a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSTableSuite.scala
+++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSTableSuite.scala
@@ -19,7 +19,6 @@ package org.apache.kyuubi.spark.connector.tpcds
import io.trino.tpcds.Table
import io.trino.tpcds.generator.CallCenterGeneratorColumn
-import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
@@ -27,16 +26,12 @@ import org.apache.kyuubi.KyuubiFunSuite
import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession
import org.apache.kyuubi.spark.connector.tpcds.TPCDSConf._
-class TPCDSTableSuite extends KyuubiFunSuite {
+class TPCDSTableSuite extends KyuubiFunSuite with TPCDSSuiteBase {
test("useAnsiStringType (true, false)") {
Seq(true, false).foreach(key => {
- val sparkConf = new SparkConf().setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName)
- .set("spark.sql.catalog.tpcds.useAnsiStringType", key.toString)
- withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
+ val conf = sparkConf.set("spark.sql.catalog.tpcds.useAnsiStringType", key.toString)
+ withSparkSession(SparkSession.builder.config(conf).getOrCreate()) { spark =>
val rows = spark.sql("desc tpcds.sf1.call_center").collect()
rows.foreach(row => {
val dataType = row.getString(1)
@@ -63,10 +58,6 @@ class TPCDSTableSuite extends KyuubiFunSuite {
test("test nullable column") {
TPCDSSchemaUtils.BASE_TABLES.foreach { tpcdsTable =>
val tableName = tpcdsTable.getName
- val sparkConf = new SparkConf().setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName)
withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
val sparkTable = spark.table(s"tpcds.sf1.$tableName")
var notNullBitMap = 0
@@ -125,14 +116,10 @@ class TPCDSTableSuite extends KyuubiFunSuite {
test("test maxPartitionBytes") {
val maxPartitionBytes: Long = 1 * 1024 * 1024L
- val sparkConf = new SparkConf().setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName)
- .set(
- s"$TPCDS_CONNECTOR_READ_CONF_PREFIX.$MAX_PARTITION_BYTES_CONF",
- String.valueOf(maxPartitionBytes))
- withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
+ val conf = sparkConf.set(
+ s"$TPCDS_CONNECTOR_READ_CONF_PREFIX.$MAX_PARTITION_BYTES_CONF",
+ String.valueOf(maxPartitionBytes))
+ withSparkSession(SparkSession.builder.config(conf).getOrCreate()) { spark =>
val tableName = "catalog_returns"
val table = Table.getTable(tableName)
val scale = 100
diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/gluten/GlutenTPCDSQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/gluten/GlutenTPCDSQuerySuite.scala
new file mode 100644
index 00000000000..609d2fbbc3a
--- /dev/null
+++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/gluten/GlutenTPCDSQuerySuite.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.spark.connector.tpcds.gluten
+
+import scala.io.{Codec, Source}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+import org.scalatest.tags.Slow
+
+import org.apache.kyuubi.{GlutenSuiteMixin, Utils}
+import org.apache.kyuubi.spark.connector.tpcds.TPCDSQuerySuite
+import org.apache.kyuubi.tags.GlutenTest
+
+@Slow
+@GlutenTest
+class GlutenTPCDSQuerySuite extends TPCDSQuerySuite with GlutenSuiteMixin {
+
+ // TODO:tpc-ds exec time over six hour
+ override val queries: Set[String] = (1 to 99).map(i => s"q$i").toSet -
+ ("q14", "q23", "q24", "q39") +
+ ("q14a", "q14b", "q23a", "q23b", "q24a", "q24b", "q39a", "q39b") -
+ // TODO:Fix gluten tpc-ds query test
+ ("q1", "q4", "q7", "q11", "q12", "q17", "q20", "q21", "q25", "q26", "q29", "q30", "q34", "q37",
+ "q39a", "q39b", "q40", "q43", "q46", "q49", "q56", "q58", "q59", "q60", "q68", "q73", "q74",
+ "q78", "q79", "q81", "q82", "q83", "q84", "q91", "q98")
+ override def sparkConf: SparkConf = {
+ val glutenConf = super.sparkConf
+ extraConfigs.foreach { case (k, v) => glutenConf.set(k, v) }
+ glutenConf
+ }
+
+ override def loadTPDSTINY(sc: SparkSession): Unit = {
+ val in = Utils.getContextOrKyuubiClassLoader.getResourceAsStream("kyuubi/load-tpcds-tiny.sql")
+ val queryContent: String = Source.fromInputStream(in)(Codec.UTF8).mkString
+ in.close()
+ queryContent.split(";\n").filterNot(_.trim.isEmpty).foreach { sql =>
+ sc.sql(sql)
+ }
+ }
+}
diff --git a/extensions/spark/kyuubi-spark-connector-tpch/pom.xml b/extensions/spark/kyuubi-spark-connector-tpch/pom.xml
index 22a5405a6a0..611214655fd 100644
--- a/extensions/spark/kyuubi-spark-connector-tpch/pom.xml
+++ b/extensions/spark/kyuubi-spark-connector-tpch/pom.xml
@@ -219,4 +219,29 @@
target/scala-${scala.binary.version}/classes
target/scala-${scala.binary.version}/test-classes
+
+
+
+ gluten
+
+ org.apache.kyuubi.tags.GlutenTest
+ 3.4.1
+ 3.4
+
+
+
+ io.glutenproject
+ gluten-velox-bundle-spark3.4_2.12-ubuntu_22.04
+ 1.1.0-SNAPSHOT
+ system
+ ${project.basedir}/../../../gluten/package/target/gluten-velox-bundle-spark3.4_2.12-ubuntu_22.04-1.1.0-SNAPSHOT.jar
+
+
+ org.apache.spark
+ spark-hive_2.12
+ test
+
+
+
+
diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/load-tpch-tiny.sql b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/load-tpch-tiny.sql
new file mode 100644
index 00000000000..8f2228f549c
--- /dev/null
+++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/load-tpch-tiny.sql
@@ -0,0 +1,59 @@
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+CREATE DATABASE IF NOT EXISTS spark_catalog.tpch_tiny;
+
+USE spark_catalog.tpch_tiny;
+
+--
+-- Name: customer; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS customer USING parquet AS SELECT * FROM tpch.tiny.customer;
+
+--
+-- Name: orders; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS orders USING parquet AS SELECT * FROM tpch.tiny.orders;
+
+--
+-- Name: lineitem; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS lineitem USING parquet AS SELECT * FROM tpch.tiny.lineitem;
+
+--
+-- Name: part; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS part USING parquet AS SELECT * FROM tpch.tiny.part;
+
+--
+-- Name: partsupp; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS partsupp USING parquet AS SELECT * FROM tpch.tiny.partsupp;
+
+--
+-- Name: supplier; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS supplier USING parquet AS SELECT * FROM tpch.tiny.supplier;
+
+--
+-- Name: nation; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS nation USING parquet AS SELECT * FROM tpch.tiny.nation;
+
+--
+-- Name: region; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS region USING parquet AS SELECT * FROM tpch.tiny.region;
diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala
index 14415141e63..0469d385b5b 100644
--- a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala
+++ b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala
@@ -25,16 +25,9 @@ import org.apache.kyuubi.KyuubiFunSuite
import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession
import org.apache.kyuubi.spark.connector.common.SparkUtils.SPARK_RUNTIME_VERSION
-class TPCHCatalogSuite extends KyuubiFunSuite {
+class TPCHCatalogSuite extends KyuubiFunSuite with TPCHSuiteBase {
test("get catalog name") {
- val sparkConf = new SparkConf()
- .setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName)
- .set("spark.sql.cbo.enabled", "true")
- .set("spark.sql.cbo.planStats.enabled", "true")
withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { _ =>
val catalog = new TPCHCatalog
val catalogName = "test"
@@ -44,13 +37,6 @@ class TPCHCatalogSuite extends KyuubiFunSuite {
}
test("supports namespaces") {
- val sparkConf = new SparkConf()
- .setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName)
- .set("spark.sql.cbo.enabled", "true")
- .set("spark.sql.cbo.planStats.enabled", "true")
withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
spark.sql("USE tpch")
assert(spark.sql(s"SHOW DATABASES").collect().length == 12)
@@ -63,12 +49,10 @@ class TPCHCatalogSuite extends KyuubiFunSuite {
"TINY,sf10" -> Seq("tiny", "sf10"),
"sf1 , " -> Seq("sf1"),
"none" -> Seq.empty[String]).foreach { case (confValue, expectedExcludeDatabases) =>
- val sparkConf = new SparkConf().setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName)
- .set("spark.sql.catalog.tpch.excludeDatabases", confValue)
- withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
+ val conf = sparkConf.set("spark.sql.catalog.tpch.excludeDatabases", confValue)
+ .remove("spark.sql.cbo.enabled")
+ .remove("spark.sql.cbo.planStats.enabled")
+ withSparkSession(SparkSession.builder.config(conf).getOrCreate()) { spark =>
spark.sql("USE tpch")
assert(
spark.sql(s"SHOW DATABASES").collect.map(_.getString(0)).sorted ===
@@ -78,13 +62,6 @@ class TPCHCatalogSuite extends KyuubiFunSuite {
}
test("tpch.tiny count") {
- val sparkConf = new SparkConf()
- .setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName)
- .set("spark.sql.cbo.enabled", "true")
- .set("spark.sql.cbo.planStats.enabled", "true")
withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
assert(spark.table("tpch.tiny.customer").count === 1500)
assert(spark.table("tpch.tiny.orders").count === 15000)
@@ -98,13 +75,6 @@ class TPCHCatalogSuite extends KyuubiFunSuite {
}
test("tpch.sf0 count") {
- val sparkConf = new SparkConf()
- .setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName)
- .set("spark.sql.cbo.enabled", "true")
- .set("spark.sql.cbo.planStats.enabled", "true")
withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
assert(spark.table("tpch.sf0.customer").count === 0)
assert(spark.table("tpch.sf0.orders").count === 0)
@@ -118,13 +88,6 @@ class TPCHCatalogSuite extends KyuubiFunSuite {
}
test("tpch.sf1 stats") {
- val sparkConf = new SparkConf()
- .setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName)
- .set("spark.sql.cbo.enabled", "true")
- .set("spark.sql.cbo.planStats.enabled", "true")
withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
def assertStats(tableName: String, sizeInBytes: BigInt, rowCount: BigInt): Unit = {
val stats = spark.table(tableName).queryExecution.analyzed.stats
@@ -147,13 +110,6 @@ class TPCHCatalogSuite extends KyuubiFunSuite {
}
test("nonexistent table") {
- val sparkConf = new SparkConf()
- .setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName)
- .set("spark.sql.cbo.enabled", "true")
- .set("spark.sql.cbo.planStats.enabled", "true")
withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
val exception = intercept[AnalysisException] {
spark.table("tpch.sf1.nonexistent_table")
@@ -162,4 +118,9 @@ class TPCHCatalogSuite extends KyuubiFunSuite {
|| exception.message.contains("TABLE_OR_VIEW_NOT_FOUND"))
}
}
+
+ override def sparkConf: SparkConf = {
+ super.sparkConf.set("spark.sql.cbo.enabled", "true")
+ .set("spark.sql.cbo.planStats.enabled", "true")
+ }
}
diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHQuerySuite.scala
index c651d930043..3353f5d43c7 100644
--- a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHQuerySuite.scala
+++ b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHQuerySuite.scala
@@ -20,7 +20,6 @@ package org.apache.kyuubi.spark.connector.tpch
import scala.collection.JavaConverters._
import scala.io.{Codec, Source}
-import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.scalatest.tags.Slow
@@ -40,18 +39,14 @@ import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSessi
* }}}
*/
@Slow
-class TPCHQuerySuite extends KyuubiFunSuite {
+class TPCHQuerySuite extends KyuubiFunSuite with TPCHSuiteBase {
- val queries: List[String] = (1 to 22).map(i => s"q$i").toList
+ val queries: Set[String] = (1 to 22).map(i => s"q$i").toSet
test("run query on tiny") {
val viewSuffix = "view"
- val sparkConf = new SparkConf().setMaster("local[*]")
- .set("spark.ui.enabled", "false")
- .set("spark.sql.catalogImplementation", "in-memory")
- .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName)
withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark =>
- spark.sql("USE tpch.tiny")
+ loadTPCHTINY(spark)
queries.map { queryName =>
val in = Utils.getContextOrKyuubiClassLoader.getResourceAsStream(
s"kyuubi/tpch/$queryName.sql")
@@ -71,4 +66,8 @@ class TPCHQuerySuite extends KyuubiFunSuite {
}
}
}
+
+ def loadTPCHTINY(sc: SparkSession): Unit = {
+ sc.sql("USE tpch.tiny")
+ }
}
diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHSuiteBase.scala b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHSuiteBase.scala
new file mode 100644
index 00000000000..4ec24923488
--- /dev/null
+++ b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHSuiteBase.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.spark.connector.tpch
+
+import org.apache.spark.SparkConf
+
+trait TPCHSuiteBase {
+ def sparkConf: SparkConf = {
+ new SparkConf().setMaster("local[*]")
+ .set("spark.ui.enabled", "false")
+ .set("spark.sql.catalogImplementation", "in-memory")
+ .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName)
+ }
+}
diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/gluten/GlutenTPCHQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/gluten/GlutenTPCHQuerySuite.scala
new file mode 100644
index 00000000000..aae00efd828
--- /dev/null
+++ b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/gluten/GlutenTPCHQuerySuite.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.spark.connector.tpch.gluten
+
+import scala.io.{Codec, Source}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+import org.scalatest.tags.Slow
+
+import org.apache.kyuubi.{GlutenSuiteMixin, Utils}
+import org.apache.kyuubi.spark.connector.tpch.TPCHQuerySuite
+import org.apache.kyuubi.tags.GlutenTest
+
+@Slow
+@GlutenTest
+class GlutenTPCHQuerySuite extends TPCHQuerySuite with GlutenSuiteMixin {
+ // TODO: Fix the inconsistency in q9 results.
+ override val queries: Set[String] = (1 to 22).map(i => s"q$i").toSet - "q9"
+
+ override def sparkConf: SparkConf = {
+ val glutenConf = super.sparkConf
+ extraConfigs.foreach { case (k, v) => glutenConf.set(k, v) }
+ glutenConf
+ }
+
+ override def loadTPCHTINY(sc: SparkSession): Unit = {
+ val in = Utils.getContextOrKyuubiClassLoader.getResourceAsStream("kyuubi/load-tpch-tiny.sql")
+ val queryContent: String = Source.fromInputStream(in)(Codec.UTF8).mkString
+ in.close()
+ queryContent.split(";\n").filterNot(_.trim.isEmpty).foreach { sql =>
+ sc.sql(sql)
+ }
+ }
+
+}
diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/GlutenSuiteMixin.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/GlutenSuiteMixin.scala
new file mode 100644
index 00000000000..6c17c5b9392
--- /dev/null
+++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/GlutenSuiteMixin.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi
+
+trait GlutenSuiteMixin {
+ protected def extraJars: String = {
+ System.getProperty("java.class.path")
+ .split(":")
+ .filter(_.contains("gluten-velox-bundle-spark")).head
+ }
+
+ protected def extraConfigs = Map(
+ "spark.plugins" -> "io.glutenproject.GlutenPlugin",
+ "spark.memory.offHeap.size" -> "20g",
+ "spark.memory.offHeap.enabled" -> "true",
+ "spark.shuffle.manager" -> "org.apache.spark.shuffle.sort.ColumnarShuffleManager",
+ "spark.jars" -> extraJars)
+}
diff --git a/kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/GlutenTest.java b/kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/GlutenTest.java
new file mode 100644
index 00000000000..8620df4b95a
--- /dev/null
+++ b/kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/GlutenTest.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.tags;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+import org.scalatest.TagAnnotation;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface GlutenTest {}
diff --git a/pom.xml b/pom.xml
index b7ba11018e1..b25370bd66b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -236,7 +236,7 @@
1.12.1
4.8.0
2.2.0
- org.scalatest.tags.Slow,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest
+ org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest
false
2.30.0
@@ -2231,7 +2231,7 @@
1.3.1
spark-${spark.version}-bin-hadoop3.2.tgz
- org.scalatest.tags.Slow
+ org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest
@@ -2247,7 +2247,7 @@
delta-core
2.0.2
spark-${spark.version}-bin-hadoop3.2${spark.archive.scala.suffix}.tgz
- org.scalatest.tags.Slow
+ org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest
@@ -2263,7 +2263,7 @@
delta-core
3.3.3
3.3
- org.scalatest.tags.Slow
+ org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest
@@ -2278,7 +2278,7 @@
2.4.0
3.4.1
3.4
- org.scalatest.tags.Slow
+ org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest
@@ -2296,7 +2296,7 @@
3.4
3.5.0
3.5
- org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.PySparkTest,org.apache.kyuubi.tags.PaimonTest
+ org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.PySparkTest,org.apache.kyuubi.tags.PaimonTest
@@ -2304,7 +2304,7 @@
spark-master
4.0.0-SNAPSHOT
- org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.PySparkTest
+ org.scalatest.tags.Slow,org.apache.kyuubi.tags.GlutenTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.PySparkTest