Skip to content

Commit

Permalink
build: Add CI for TPC-H queries (apache#220)
Browse files Browse the repository at this point in the history
* build: Add CI for TPC-H queries

* Add tpch generated data to the excluded list of license check.
  • Loading branch information
viirya authored Mar 22, 2024
1 parent 9e59732 commit 4329838
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 0 deletions.
123 changes: 123 additions & 0 deletions .github/workflows/benchmark-tpch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

name: TPC-H Correctness

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

on:
push:
paths-ignore:
- "doc/**"
- "**.md"
pull_request:
paths-ignore:
- "doc/**"
- "**.md"
# manual trigger
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
workflow_dispatch:

env:
RUST_VERSION: nightly

jobs:
prepare:
name: Build native and prepare data
runs-on: ubuntu-latest
container:
image: amd64/rust
env:
JAVA_VERSION: 11
steps:
- uses: actions/checkout@v4
- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{env.RUST_VERSION}}
jdk-version: 11
- name: Cache Maven dependencies
uses: actions/cache@v4
with:
path: |
~/.m2/repository
/root/.m2/repository
key: ${{ runner.os }}-java-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-java-maven-
- name: Cache TPC-H generated data
id: cache-tpch-sf-1
uses: actions/cache@v4
with:
path: ./tpch
key: tpch-${{ hashFiles('.github/workflows/benchmark-tpch.yml') }}
- name: Build Comet
run: make release
- name: Upload Comet native lib
uses: actions/upload-artifact@v4
with:
name: libcomet-${{ github.run_id }}
path: |
core/target/release/libcomet.so
core/target/release/libcomet.dylib
retention-days: 1 # remove the artifact after 1 day, only valid for this workflow
overwrite: true
- name: Generate TPC-H (SF=1) table data
if: steps.cache-tpch-sf-1.outputs.cache-hit != 'true'
run: |
cd spark && MAVEN_OPTS='-Xmx20g' ../mvnw exec:java -Dexec.mainClass="org.apache.spark.sql.GenTPCHData" -Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false" -Dexec.args="--location `pwd`/.. --scaleFactor 1 --numPartitions 1 --overwrite"
cd ..
benchmark:
name: Run TPCHQuerySuite
runs-on: ubuntu-latest
needs: [prepare]
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{env.RUST_VERSION}}
jdk-version: 11
- name: Cache Maven dependencies
uses: actions/cache@v4
with:
path: |
~/.m2/repository
/root/.m2/repository
key: ${{ runner.os }}-java-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-java-maven-
- name: Restore TPC-H generated data
id: cache-tpch-sf-1
uses: actions/cache/restore@v4
with:
path: ./tpch
key: tpch-${{ hashFiles('.github/workflows/benchmark-tpch.yml') }}
fail-on-cache-miss: true # it's always be cached as it should be generated by pre-step if not existed
- name: Download Comet native lib
uses: actions/download-artifact@v4
with:
name: libcomet-${{ github.run_id }}
path: core/target/release
- name: Run TPC-H queries
run: |
SPARK_HOME=`pwd` SPARK_TPCH_DATA=`pwd`/tpch/sf1_parquet ./mvnw -B -Prelease -Dsuites=org.apache.spark.sql.CometTPCHQuerySuite test
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,7 @@ under the License.
<exclude>**/inspections/*.txt</exclude>
<exclude>tpcds-kit/**</exclude>
<exclude>tpcds-sf-1/**</exclude>
<exclude>tpch/**</exclude>
</excludes>
</configuration>
</plugin>
Expand Down

0 comments on commit 4329838

Please sign in to comment.