Skip to content

[GLUTEN-7950][VL] Keep Core module's build flag consistent with Velox #1682

[GLUTEN-7950][VL] Keep Core module's build flag consistent with Velox

[GLUTEN-7950][VL] Keep Core module's build flag consistent with Velox #1682

Workflow file for this run

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Velox backend Github Runner
on:
pull_request:
paths:
- '.github/workflows/velox_backend.yml'
- 'pom.xml'
- 'backends-velox/**'
- 'gluten-uniffle/**'
- 'gluten-celeborn/common/**'
- 'gluten-celeborn/package/**'
- 'gluten-celeborn/velox/**'
- 'gluten-ras/**'
- 'gluten-core/**'
- 'gluten-substrait/**'
- 'gluten-arrow/**'
- 'gluten-delta/**'
- 'gluten-iceberg/**'
- 'gluten-hudi/**'
- 'gluten-ut/**'
- 'shims/**'
- 'tools/gluten-it/**'
- 'ep/build-velox/**'
- 'cpp/*'
- 'cpp/CMake/**'
- 'cpp/velox/**'
- 'cpp/core/**'
- 'dev/**'
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
MVN_CMD: 'mvn -ntp'
WGET_CMD: 'wget -nv'
SETUP: 'bash .github/workflows/util/setup_helper.sh'
CCACHE_DIR: "${{ github.workspace }}/.ccache"
concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true
jobs:
build-native-lib-centos-7:
runs-on: ubuntu-20.04
container: apache/gluten:vcpkg-centos-7
steps:
- uses: actions/checkout@v2
- name: Get Ccache
uses: actions/cache/restore@v3
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-centos7-release-default-${{github.sha}}
restore-keys: |
ccache-centos7-release-default
- name: Build Gluten native libraries
run: |
df -a
cd $GITHUB_WORKSPACE/
bash dev/ci-velox-buildstatic-centos-7.sh
ccache -s
- name: "Save ccache"
uses: actions/cache/save@v3
id: ccache
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-centos7-release-default-${{github.sha}}
- uses: actions/upload-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases/
- uses: actions/upload-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
run-tpc-test-ubuntu:
needs: build-native-lib-centos-7
strategy:
fail-fast: false
matrix:
os: [ "ubuntu:20.04", "ubuntu:22.04" ]
spark: [ "spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5" ]
java: [ "java-8", "java-11", "java-17" ]
# Spark supports JDK17 since 3.3 and later, see https://issues.apache.org/jira/browse/SPARK-33772
exclude:
- spark: spark-3.2
java: java-17
- spark: spark-3.4
java: java-17
- spark: spark-3.5
java: java-17
- spark: spark-3.2
java: java-11
- spark: spark-3.3
java: java-11
- spark: spark-3.4
java: java-11
- os: ubuntu:22.04
java: java-17
- os: ubuntu:22.04
java: java-11
runs-on: ubuntu-20.04
container: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- name: Download All Native Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases/
- name: Download All Arrow Jar Artifacts
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Setup tzdata
run: |
if [ "${{ matrix.os }}" = "ubuntu:22.04" ]; then
apt-get update
TZ="Etc/GMT" DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata
fi
- name: Setup java and maven
run: |
if [ "${{ matrix.java }}" = "java-17" ]; then
apt-get update && apt-get install -y openjdk-17-jdk maven
apt remove openjdk-11* -y
elif [ "${{ matrix.java }}" = "java-11" ]; then
apt-get update && apt-get install -y openjdk-11-jdk maven
else
apt-get update && apt-get install -y openjdk-8-jdk maven
apt remove openjdk-11* -y
fi
ls -l /root/.m2/repository/org/apache/arrow/arrow-dataset/15.0.0-gluten/
- name: Build and run TPC-H / TPC-DS
run: |
cd $GITHUB_WORKSPACE/
export JAVA_HOME=/usr/lib/jvm/${{ matrix.java }}-openjdk-amd64
echo "JAVA_HOME: $JAVA_HOME"
$MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests
cd $GITHUB_WORKSPACE/tools/gluten-it
$MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1
run-tpc-test-centos:
needs: build-native-lib-centos-7
strategy:
fail-fast: false
matrix:
os: [ "centos:7", "centos:8" ]
spark: [ "spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5" ]
java: [ "java-8", "java-11", "java-17" ]
# Spark supports JDK17 since 3.3 and later, see https://issues.apache.org/jira/browse/SPARK-33772
exclude:
- spark: spark-3.2
java: java-17
- spark: spark-3.4
java: java-17
- spark: spark-3.5
java: java-17
- spark: spark-3.2
java: java-11
- spark: spark-3.3
java: java-11
- spark: spark-3.4
java: java-11
- os: centos:7
java: java-17
- os: centos:7
java: java-11
runs-on: ubuntu-20.04
container: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- name: Download All Native Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases/
- name: Download All Arrow Jar Artifacts
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Update mirror list
run: |
if [ "${{ matrix.os }}" = "centos:7" ] || [ "${{ matrix.os }}" = "centos:8" ]; then
sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true
sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true
fi
- name: Setup java and maven
run: |
if [ "${{ matrix.java }}" = "java-17" ]; then
yum update -y && yum install -y java-17-openjdk-devel wget
elif [ "${{ matrix.java }}" = "java-11" ]; then
yum update -y && yum install -y java-11-openjdk-devel wget
else
yum update -y && yum install -y java-1.8.0-openjdk-devel wget
fi
$SETUP install_maven
- name: Set environment variables
run: |
if [ "${{ matrix.java }}" = "java-17" ]; then
echo "JAVA_HOME=/usr/lib/jvm/java-17-openjdk" >> $GITHUB_ENV
elif [ "${{ matrix.java }}" = "java-11" ]; then
echo "JAVA_HOME=/usr/lib/jvm/java-11-openjdk" >> $GITHUB_ENV
else
echo "JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk" >> $GITHUB_ENV
fi
- name: Build gluten-it
run: |
echo "JAVA_HOME: $JAVA_HOME"
cd $GITHUB_WORKSPACE/
$MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests
cd $GITHUB_WORKSPACE/tools/gluten-it
$MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }}
- name: Run TPC-H / TPC-DS
run: |
echo "JAVA_HOME: $JAVA_HOME"
cd $GITHUB_WORKSPACE/tools/gluten-it
GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1
- name: Run TPC-H / TPC-DS with RAS
run: |
echo "JAVA_HOME: $JAVA_HOME"
cd $GITHUB_WORKSPACE/tools/gluten-it
GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
--extra-conf=spark.gluten.ras.enabled=true \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
--extra-conf=spark.gluten.ras.enabled=true
run-tpc-test-ubuntu-oom:
needs: build-native-lib-centos-7
strategy:
fail-fast: false
matrix:
spark: [ "spark-3.2" ]
runs-on: ubuntu-20.04
steps:
- name: Maximize build disk space
shell: bash
run: |
df -h
set -euo pipefail
echo "Removing unwanted software... "
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo docker image prune --all --force > /dev/null
df -h
- uses: actions/checkout@v2
- name: Download All Native Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases/
- name: Download All Arrow Jar Artifacts
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /home/runner/.m2/repository/org/apache/arrow/
- name: Setup java and maven
run: |
sudo apt-get update
sudo apt-get install -y openjdk-8-jdk maven
- name: Set environment variables
run: |
echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV
- name: Build for Spark ${{ matrix.spark }}
run: |
cd $GITHUB_WORKSPACE/
$MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests
cd $GITHUB_WORKSPACE/tools/gluten-it
$MVN_CMD clean install -P${{ matrix.spark }}
GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local --benchmark-type=ds -s=30.0 --threads=12
- name: TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation off
run: |
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--data-gen=skip -m=OffHeapExecutionMemory \
-d=ISOLATION:OFF,spark.gluten.memory.isolation=false \
-d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \
-d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \
-d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \
-d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5 \
--excluded-dims=OFFHEAP_SIZE:4g
- name: TPC-DS SF30.0 Parquet local spark3.2 Q67 low memory, memory isolation on
run: |
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--data-gen=skip -m=OffHeapExecutionMemory \
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \
-d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \
-d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \
-d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \
-d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5
- name: TPC-DS SF30.0 Parquet local spark3.2 Q95 low memory, memory isolation on
run: |
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--data-gen=skip -m=OffHeapExecutionMemory \
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \
-d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \
-d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \
-d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \
-d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5
- name: TPC-DS SF30.0 Parquet local spark3.2 Q23A/Q23B low memory
run: |
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--data-gen=skip -m=OffHeapExecutionMemory \
-d=ISOLATION:OFF,spark.gluten.memory.isolation=false \
-d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \
-d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \
-d=FLUSH_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \
-d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0
- name: TPC-DS SF30.0 Parquet local spark3.2 Q23A/Q23B low memory, memory isolation on
run: |
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--data-gen=skip -m=OffHeapExecutionMemory \
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \
-d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \
-d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \
-d=FLUSH_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \
-d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0
- name: TPC-DS SF30.0 Parquet local spark3.2 Q97 low memory
run: |
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q97 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--data-gen=skip -m=OffHeapExecutionMemory \
--extra-conf=spark.gluten.sql.columnar.backend.velox.IOThreads=0 \
-d=ISOLATION:OFF,spark.gluten.memory.isolation=false \
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \
-d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \
-d=OFFHEAP_SIZE:1g,spark.memory.offHeap.size=1g \
-d=IO_THREADS:12,spark.gluten.sql.columnar.backend.velox.IOThreads=12 \
-d=IO_THREADS:0,spark.gluten.sql.columnar.backend.velox.IOThreads=0
run-tpc-test-ubuntu-randomkill:
needs: build-native-lib-centos-7
strategy:
fail-fast: false
matrix:
spark: [ "spark-3.2" ]
runs-on: ubuntu-20.04
steps:
- name: Maximize build disk space
shell: bash
run: |
df -h
set -euo pipefail
echo "Removing unwanted software... "
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo docker image prune --all --force > /dev/null
df -h
- uses: actions/checkout@v2
- name: Download All Native Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases/
- name: Download All Arrow Jar Artifacts
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /home/runner/.m2/repository/org/apache/arrow/
- name: Setup java and maven
run: |
sudo apt-get update
sudo apt-get install -y openjdk-8-jdk maven
- name: Set environment variables
run: |
echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV
- name: Build for Spark ${{ matrix.spark }}
run: |
cd $GITHUB_WORKSPACE/
$MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests
cd $GITHUB_WORKSPACE/tools/gluten-it
$MVN_CMD clean install -P${{ matrix.spark }}
GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local --benchmark-type=ds -s=30.0 --threads=12
- name: TPC-DS SF30.0 Parquet local spark3.2 random kill tasks
run: |
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries \
--local --preset=velox --benchmark-type=ds --error-on-memleak -s=30.0 --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 \
--data-gen=skip --random-kill-tasks --no-session-reuse
run-tpc-test-centos8-uniffle:
needs: build-native-lib-centos-7
strategy:
fail-fast: false
matrix:
spark: [ "spark-3.2" ]
runs-on: ubuntu-20.04
container: centos:8
steps:
- uses: actions/checkout@v2
- name: Download All Native Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases/
- name: Download All Arrow Jar Artifacts
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Update mirror list
run: |
sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true
sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true
- name: Setup java and maven
run: |
yum update -y && yum install -y java-1.8.0-openjdk-devel wget git
$SETUP install_maven
- name: Build for Uniffle 0.9.0
run: |
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk && \
cd /opt && \
git clone -b v0.9.0 https://github.com/apache/incubator-uniffle.git && \
cd incubator-uniffle && \
$MVN_CMD clean install -Phadoop2.8,spark3 -DskipTests
cd /opt && \
${WGET_CMD} https://archive.apache.org/dist/incubator/uniffle/0.9.0/apache-uniffle-0.9.0-incubating-bin.tar.gz && \
tar xzf apache-uniffle-0.9.0-incubating-bin.tar.gz -C /opt/ && mv /opt/rss-0.9.0-hadoop2.8 /opt/uniffle && \
${WGET_CMD} https://archive.apache.org/dist/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz && \
tar xzf hadoop-2.8.5.tar.gz -C /opt/
rm -rf /opt/incubator-uniffle
cd /opt/uniffle && mkdir shuffle_data && \
bash -c "echo -e 'XMX_SIZE=16g\nHADOOP_HOME=/opt/hadoop-2.8.5' > ./bin/rss-env.sh" && \
bash -c "echo -e 'rss.coordinator.shuffle.nodes.max 1\nrss.rpc.server.port 19999' > ./conf/coordinator.conf" && \
bash -c "echo -e 'rss.server.app.expired.withoutHeartbeat 7200000\nrss.server.heartbeat.delay 3000\nrss.rpc.server.port 19997\nrss.rpc.server.type GRPC_NETTY\nrss.jetty.http.port 19996\nrss.server.netty.port 19995\nrss.storage.basePath /opt/uniffle/shuffle_data\nrss.storage.type MEMORY_LOCALFILE\nrss.coordinator.quorum localhost:19999\nrss.server.flush.thread.alive 10\nrss.server.single.buffer.flush.threshold 64m' > ./conf/server.conf" && \
bash ./bin/start-coordinator.sh && bash ./bin/start-shuffle-server.sh
- name: Build for Spark ${{ matrix.spark }}
run: |
cd $GITHUB_WORKSPACE/ && \
$MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox -Puniffle -DskipTests
- name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 with uniffle-0.9.0
run: |
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk && \
cd $GITHUB_WORKSPACE/tools/gluten-it && \
$MVN_CMD clean install -Pspark-3.2 -Puniffle && \
GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox-with-uniffle --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1
run-tpc-test-ubuntu-2204-celeborn:
needs: build-native-lib-centos-7
strategy:
fail-fast: false
matrix:
spark: [ "spark-3.2" ]
celeborn: [ "celeborn-0.5.1", "celeborn-0.4.2", "celeborn-0.3.2-incubating" ]
runs-on: ubuntu-20.04
container: ubuntu:22.04
steps:
- uses: actions/checkout@v2
- name: Download All Native Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases/
- name: Download All Arrow Jar Artifacts
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Setup tzdata
run: |
apt-get update
TZ="Etc/GMT" DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata
- name: Setup java and maven
run: |
apt-get update && apt-get install -y openjdk-8-jdk maven wget
apt remove openjdk-11* -y
echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV
- name: Build for Spark ${{ matrix.spark }}
run: |
cd $GITHUB_WORKSPACE/
$MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox -Pceleborn -DskipTests
- name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 with ${{ matrix.celeborn }}
run: |
EXTRA_PROFILE=""
if [ "${{ matrix.celeborn }}" = "celeborn-0.4.2" ]; then
EXTRA_PROFILE="-Pceleborn-0.4"
elif [ "${{ matrix.celeborn }}" = "celeborn-0.5.1" ]; then
EXTRA_PROFILE="-Pceleborn-0.5"
fi
echo "EXTRA_PROFILE: ${EXTRA_PROFILE}"
cd /opt && mkdir -p celeborn && \
${WGET_CMD} https://archive.apache.org/dist/celeborn/${{ matrix.celeborn }}/apache-${{ matrix.celeborn }}-bin.tgz && \
tar xzf apache-${{ matrix.celeborn }}-bin.tgz -C /opt/celeborn --strip-components=1 && cd celeborn && \
mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \
bash -c "echo -e 'CELEBORN_MASTER_MEMORY=4g\nCELEBORN_WORKER_MEMORY=4g\nCELEBORN_WORKER_OFFHEAP_MEMORY=8g' > ./conf/celeborn-env.sh" && \
bash -c "echo -e 'celeborn.worker.commitFiles.threads 128\nceleborn.worker.sortPartition.threads 64' > ./conf/celeborn-defaults.conf" && \
bash ./sbin/start-master.sh && bash ./sbin/start-worker.sh && \
cd $GITHUB_WORKSPACE/tools/gluten-it && $MVN_CMD clean install -Pspark-3.2 -Pceleborn ${EXTRA_PROFILE} && \
GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox-with-celeborn --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=8 --iterations=1 && \
GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox-with-celeborn --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=8 --iterations=1
run-spark-test-spark32:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
steps:
- uses: actions/checkout@v2
- uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases
- uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare spark.test.home for Spark 3.2.2 (other tests)
run: |
bash .github/workflows/util/install_spark_resources.sh 3.2
dnf module -y install python39 && \
alternatives --set python3 /usr/bin/python3.9 && \
pip3 install setuptools && \
pip3 install pyspark==3.2.2 cython && \
pip3 install pandas pyarrow
- name: Build and run unit test for Spark 3.2.2 (other tests)
run: |
cd $GITHUB_WORKSPACE/
export SPARK_SCALA_VERSION=2.12
$MVN_CMD clean test -Pspark-3.2 -Pspark-ut -Pbackends-velox -Pceleborn -Piceberg \
-Pdelta -Phudi -DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark32/spark_home/" \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
name: test-report-spark32
path: '**/surefire-reports/TEST-*.xml'
- name: Upload golden files
if: failure()
uses: actions/upload-artifact@v4
with:
name: golden-files-spark32
path: /tmp/tpch-approved-plan/**
run-spark-test-spark32-slow:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases
- name: Download Arrow Jars
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare spark.test.home for Spark 3.2.2 (slow tests)
run: |
bash .github/workflows/util/install_spark_resources.sh 3.2
- name: Build and run unit test for Spark 3.2.2 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
$MVN_CMD clean test -Pspark-3.2 -Pspark-ut -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi \
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark32/spark_home/" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
name: test-report-spark32-slow
path: '**/surefire-reports/TEST-*.xml'
run-spark-test-spark33:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases
- name: Download Arrow Jars
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare spark.test.home for Spark 3.3.1 (other tests)
run: |
bash .github/workflows/util/install_spark_resources.sh 3.3
dnf module -y install python39 && \
alternatives --set python3 /usr/bin/python3.9 && \
pip3 install setuptools && \
pip3 install pyspark==3.3.1 cython && \
pip3 install pandas pyarrow
- name: Build and Run unit test for Spark 3.3.1 (other tests)
run: |
cd $GITHUB_WORKSPACE/
export SPARK_SCALA_VERSION=2.12
$MVN_CMD clean test -Pspark-3.3 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark33/spark_home/" \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
name: test-report-spark33
path: '**/surefire-reports/TEST-*.xml'
- name: Upload golden files
if: failure()
uses: actions/upload-artifact@v4
with:
name: golden-files-spark33
path: /tmp/tpch-approved-plan/**
run-spark-test-spark33-slow:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases
- name: Download Arrow Jars
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare spark.test.home for Spark 3.3.1 (slow tests)
run: |
bash .github/workflows/util/install_spark_resources.sh 3.3
- name: Build and Run unit test for Spark 3.3.1 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
$MVN_CMD clean test -Pspark-3.3 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark33/spark_home/" \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
name: test-report-spark33-slow
path: '**/surefire-reports/TEST-*.xml'
run-spark-test-spark34:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases
- name: Download Arrow Jars
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare spark.test.home for Spark 3.4.3 (other tests)
run: |
bash .github/workflows/util/install_spark_resources.sh 3.4
dnf module -y install python39 && \
alternatives --set python3 /usr/bin/python3.9 && \
pip3 install setuptools && \
pip3 install pyspark==3.4.3 cython && \
pip3 install pandas pyarrow
- name: Build and Run unit test for Spark 3.4.3 (other tests)
run: |
cd $GITHUB_WORKSPACE/
export SPARK_SCALA_VERSION=2.12
$MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark34/spark_home/" \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
name: test-report-spark34
path: '**/surefire-reports/TEST-*.xml'
- name: Upload golden files
if: failure()
uses: actions/upload-artifact@v4
with:
name: golden-files-spark34
path: /tmp/tpch-approved-plan/**
run-spark-test-spark34-slow:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases
- name: Download Arrow Jars
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare spark.test.home for Spark 3.4.3 (slow tests)
run: |
bash .github/workflows/util/install_spark_resources.sh 3.4
- name: Build and Run unit test for Spark 3.4.3 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
$MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark34/spark_home/" \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
name: test-report-spark34-slow
path: '**/surefire-reports/TEST-*.xml'
run-spark-test-spark35:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases
- name: Download Arrow Jars
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare spark.test.home for Spark 3.5.2 (other tests)
run: |
bash .github/workflows/util/install_spark_resources.sh 3.5
dnf module -y install python39 && \
alternatives --set python3 /usr/bin/python3.9 && \
pip3 install setuptools && \
pip3 install pyspark==3.5.2 cython && \
pip3 install pandas pyarrow
- name: Build and Run unit test for Spark 3.5.2 (other tests)
run: |
cd $GITHUB_WORKSPACE/
export SPARK_SCALA_VERSION=2.12
$MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
name: test-report-spark35
path: '**/surefire-reports/TEST-*.xml'
- name: Upload golden files
if: failure()
uses: actions/upload-artifact@v4
with:
name: golden-files-spark35
path: /tmp/tpch-approved-plan/**
run-spark-test-spark35-scala213:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases
- name: Download Arrow Jars
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare spark.test.home for Spark 3.5.2 (other tests)
run: |
bash .github/workflows/util/install_spark_resources.sh 3.5-scala2.13
dnf module -y install python39 && \
alternatives --set python3 /usr/bin/python3.9 && \
pip3 install setuptools && \
pip3 install pyspark==3.5.2 cython && \
pip3 install pandas pyarrow
- name: Build and Run unit test for Spark 3.5.2 with scala-2.13 (other tests)
run: |
cd $GITHUB_WORKSPACE/
export SPARK_SCALA_VERSION=2.13
$MVN_CMD clean test -Pspark-3.5 -Pscala-2.13 -Pbackends-velox -Pceleborn -Piceberg \
-Pdelta -Pspark-ut -DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
name: test-report-spark35-scala213
path: '**/surefire-reports/TEST-*.xml'
run-spark-test-spark35-slow:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases
- name: Download Arrow Jars
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare spark.test.home for Spark 3.5.2 (slow tests)
run: |
bash .github/workflows/util/install_spark_resources.sh 3.5
- name: Build and Run unit test for Spark 3.5.2 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
$MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
name: test-report-spark35-slow
path: '**/surefire-reports/TEST-*.xml'
run-spark-test-spark35-ras:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases
- name: Download Arrow Jars
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare spark.test.home for Spark 3.5.2 (other tests)
run: |
bash .github/workflows/util/install_spark_resources.sh 3.5
dnf module -y install python39 && \
alternatives --set python3 /usr/bin/python3.9 && \
pip3 install setuptools && \
pip3 install pyspark==3.5.2 cython && \
pip3 install pandas pyarrow
- name: Build and Run unit test for Spark 3.5.2 (other tests)
run: |
cd $GITHUB_WORKSPACE/
export SPARK_SCALA_VERSION=2.12
$MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut \
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/ -Dspark.gluten.ras.enabled=true" \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
- name: Upload test report
uses: actions/upload-artifact@v4
with:
name: test-report-spark35-ras
path: '**/surefire-reports/TEST-*.xml'
run-spark-test-spark35-slow-ras:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases
- name: Download Arrow Jars
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare spark.test.home for Spark 3.5.2 (slow tests)
run: |
bash .github/workflows/util/install_spark_resources.sh 3.5
- name: Build and Run unit test for Spark 3.5.2 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
$MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut \
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/ -Dspark.gluten.ras.enabled=true" \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
- name: Upload test report
uses: actions/upload-artifact@v4
with:
name: test-report-spark35-slow-ras
path: '**/surefire-reports/TEST-*.xml'
run-cpp-test-udf-test:
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
steps:
- uses: actions/checkout@v2
- name: Get Ccache
uses: actions/cache/restore@v3
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-centos8-release-default-${{github.sha}}
restore-keys: |
ccache-centos8-release-default
- name: Build Gluten native libraries
run: |
df -a
bash dev/ci-velox-buildshared-centos-8.sh
ccache -s
- name: "Save ccache"
uses: actions/cache/save@v3
id: ccache
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-centos8-release-default-${{github.sha}}
- name: Run CPP unit test
run: |
cd ./cpp/build && ctest -V
- name: Run CPP benchmark test
run: |
$MVN_CMD test -Pspark-3.5 -Pbackends-velox -pl backends-velox -am \
-DtagsToInclude="org.apache.gluten.tags.GenerateExample" -Dtest=none -DfailIfNoTests=false -Dexec.skip
# This test depends on example.json generated by the above mvn test.
cd cpp/build/velox/benchmarks && sudo chmod +x ./generic_benchmark
./generic_benchmark --run-example --with-shuffle --threads 1 --iterations 1
- name: Run UDF test
run: |
# Depends on --build_example=ON.
$MVN_CMD test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta -DtagsToExclude=None \
-DtagsToInclude=org.apache.gluten.tags.UDFTest