From 7a3f93135276b438ba77bed9978b070a29e342a9 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 10 Jul 2024 12:58:22 -0700 Subject: [PATCH] test: Run optimized version of q72 derived from TPC-DS --- .../src/test/resources/tpcds-extended/q72.sql | 26 +++++++++++++++++++ .../spark/sql/CometTPCDSQueryTestSuite.scala | 15 +++++++++++ 2 files changed, 41 insertions(+) create mode 100644 spark/src/test/resources/tpcds-extended/q72.sql diff --git a/spark/src/test/resources/tpcds-extended/q72.sql b/spark/src/test/resources/tpcds-extended/q72.sql new file mode 100644 index 0000000000..4c95c8aa7a --- /dev/null +++ b/spark/src/test/resources/tpcds-extended/q72.sql @@ -0,0 +1,26 @@ +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo + ,sum(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join item on (i_item_sk = cs_item_sk) +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + 5 + and hd_buy_potential = '501-1000' + and d1.d_year = 1999 + and cd_marital_status = 'S' +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +LIMIT 100 diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQueryTestSuite.scala b/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQueryTestSuite.scala index c2b853515a..6aec042af9 100644 --- a/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQueryTestSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQueryTestSuite.scala @@ -35,6 +35,8 @@ import org.apache.spark.sql.test.TestSparkSession */ class CometTPCDSQueryTestSuite extends QueryTest with TPCDSBase with CometSQLQueryTestHelper { + val tpcdsExtendedQueries: Seq[String] = Seq("q72") + private val tpcdsDataPath = sys.env.get("SPARK_TPCDS_DATA") private val regenGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1" @@ -224,6 +226,19 @@ class CometTPCDSQueryTestSuite extends QueryTest with TPCDSBase with CometSQLQue } } } + + tpcdsExtendedQueries.foreach { name => + val queryString = resourceToString( + s"tpcds-extended/$name.sql", + classLoader = Thread.currentThread().getContextClassLoader) + test(s"extended $name") { + val goldenFile = new File(s"$baseResourcePath/extended", s"$name.sql.out") + joinConfs.foreach { conf => + System.gc() // SPARK-37368 + runQuery(queryString, goldenFile, conf) + } + } + } } else { ignore("skipped because env `SPARK_TPCDS_DATA` is not set") {} }