From 144fc45219c1ac0744e1101f52725eca985edefe Mon Sep 17 00:00:00 2001 From: Yuan Date: Wed, 6 Dec 2023 11:22:52 +0800 Subject: [PATCH] [VL][CI] update docker build script (#3904) this patch updates the docker build script for ubuntu/centos. do not run setup scripts on centos 7 for static build, which is not necessary. Signed-off-by: Yuan Zhou --- .github/workflows/velox_be.yml | 2 +- ep/build-velox/src/get_velox.sh | 2 + tools/gluten-te/centos/build.sh | 99 ++++++++++++++++++++ tools/gluten-te/centos/centos-7-deps.sh | 1 + tools/gluten-te/centos/dockerfile-build | 101 +++++++++++++++++++++ tools/gluten-te/ubuntu/dockerfile-build | 7 +- tools/gluten-te/ubuntu/dockerfile-buildenv | 4 + 7 files changed, 213 insertions(+), 3 deletions(-) create mode 100755 tools/gluten-te/centos/build.sh create mode 100644 tools/gluten-te/centos/dockerfile-build diff --git a/.github/workflows/velox_be.yml b/.github/workflows/velox_be.yml index 29377aa5ff2f..fc21673eb45b 100644 --- a/.github/workflows/velox_be.yml +++ b/.github/workflows/velox_be.yml @@ -488,7 +488,7 @@ jobs: cd /opt/gluten && \ sudo -E ./dev/vcpkg/setup-build-depends.sh && \ source ./dev/vcpkg/env.sh && \ - ./dev/builddeps-veloxbe.sh --build_tests=ON --build_benchmarks=ON --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON' + ./dev/builddeps-veloxbe.sh --run_setup_script=OFF --build_tests=ON --build_benchmarks=ON --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON' - name: Build for Spark 3.2.2 run: | docker exec static-build-test-$GITHUB_RUN_ID bash -c ' diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index b700fc17b0dd..e430dd7c1bb9 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -109,7 +109,9 @@ function process_setup_centos8 { sed -i '/^function dnf_install/i\DEPENDENCY_DIR=${DEPENDENCY_DIR:-$(pwd)}' scripts/setup-centos8.sh sed -i '/^dnf_install autoconf/a\dnf_install libxml2-devel libgsasl-devel libuuid-devel' scripts/setup-centos8.sh sed -i '/^function cmake_install_deps.*/i FB_OS_VERSION=v2022.11.14.00\n function install_folly {\n github_checkout facebook/folly "${FB_OS_VERSION}"\n cmake_install -DBUILD_TESTS=OFF -DFOLLY_HAVE_INT128_T=ON\n}\n' scripts/setup-centos8.sh + sed -i '/^function cmake_install_deps.*/i function install_openssl {\n wget_and_untar https://github.com/openssl/openssl/archive/refs/tags/OpenSSL_1_1_1s.tar.gz openssl \n cd openssl \n ./config no-shared && make depend && make && sudo make install \n}\n' scripts/setup-centos8.sh sed -i '/^cmake_install_deps fmt/a \install_folly' scripts/setup-centos8.sh + sed -i '/^cmake_install_deps fmt/a \install_openssl' scripts/setup-centos8.sh if [ $ENABLE_HDFS == "ON" ]; then sed -i '/^function cmake_install_deps.*/i function install_libhdfs3 {\n cd "\${DEPENDENCY_DIR}"\n github_checkout oap-project/libhdfs3 master \n cmake_install\n}\n' scripts/setup-centos8.sh diff --git a/tools/gluten-te/centos/build.sh b/tools/gluten-te/centos/build.sh new file mode 100755 index 000000000000..0dfbae086a59 --- /dev/null +++ b/tools/gluten-te/centos/build.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +BASEDIR=$(dirname $0) + +source "$BASEDIR/buildenv.sh" + +## Debug build flags + +# Create debug build +DEBUG_BUILD=${DEBUG_BUILD:-$DEFAULT_DEBUG_BUILD} + +if [ -n $JDK_DEBUG_BUILD ] +then + echo "Do not set JDK_DEBUG_BUILD manually!" +fi + +if [ -n $GLUTEN_DEBUG_BUILD ] +then + echo "Do not set GLUTEN_DEBUG_BUILD manually!" +fi + +if [ "$DEBUG_BUILD" == "ON" ] +then + JDK_DEBUG_BUILD=OFF + GLUTEN_DEBUG_BUILD=ON +else + JDK_DEBUG_BUILD=OFF + GLUTEN_DEBUG_BUILD=OFF +fi + +# The target branches +TARGET_GLUTEN_REPO=${TARGET_GLUTEN_REPO:-$DEFAULT_GLUTEN_REPO} +TARGET_GLUTEN_BRANCH=${TARGET_GLUTEN_BRANCH:-$DEFAULT_GLUTEN_BRANCH} + +# The branches used to prepare dependencies +CACHE_GLUTEN_REPO=${CACHE_GLUTEN_REPO:-$DEFAULT_GLUTEN_REPO} +CACHE_GLUTEN_BRANCH=${CACHE_GLUTEN_BRANCH:-$DEFAULT_GLUTEN_BRANCH} + +# Backend type +BUILD_BACKEND_TYPE=${BUILD_BACKEND_TYPE:-$DEFAULT_BUILD_BACKEND_TYPE} + +# Build will result in this image +DOCKER_TARGET_IMAGE_BUILD=${DOCKER_TARGET_IMAGE_BUILD:-$DEFAULT_DOCKER_TARGET_IMAGE_BUILD} + +DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILD-$OS_IMAGE" + +## Fetch target commit + +TARGET_GLUTEN_COMMIT="$(git ls-remote $TARGET_GLUTEN_REPO $TARGET_GLUTEN_BRANCH | awk '{print $1;}')" + +if [ -z "$TARGET_GLUTEN_COMMIT" ] +then + echo "Unable to parse TARGET_GLUTEN_COMMIT." + exit 1 +fi + +## + +BUILD_DOCKER_BUILD_ARGS= + +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --ulimit nofile=8192:8192" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE=$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg JDK_DEBUG_BUILD=$JDK_DEBUG_BUILD" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg GLUTEN_DEBUG_BUILD=$GLUTEN_DEBUG_BUILD" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg TARGET_GLUTEN_REPO=$TARGET_GLUTEN_REPO" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg TARGET_GLUTEN_COMMIT=$TARGET_GLUTEN_COMMIT" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg CACHE_GLUTEN_REPO=$CACHE_GLUTEN_REPO" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg CACHE_GLUTEN_BRANCH=$CACHE_GLUTEN_BRANCH" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg BUILD_BACKEND_TYPE=$BUILD_BACKEND_TYPE" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS -f $BASEDIR/dockerfile-build" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --target gluten-build" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS -t $DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE" + +if [ -n "$DOCKER_CACHE_IMAGE" ] +then + BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --cache-from $DOCKER_CACHE_IMAGE" +fi + +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS $BASEDIR" + +docker build $BUILD_DOCKER_BUILD_ARGS + +# EOF diff --git a/tools/gluten-te/centos/centos-7-deps.sh b/tools/gluten-te/centos/centos-7-deps.sh index 9b4cf1cf9890..4971efc94511 100755 --- a/tools/gluten-te/centos/centos-7-deps.sh +++ b/tools/gluten-te/centos/centos-7-deps.sh @@ -39,6 +39,7 @@ yum -y install \ git \ dnf \ cmake3 \ + ccache \ devtoolset-9 \ java-1.8.0-openjdk \ java-1.8.0-openjdk-devel \ diff --git a/tools/gluten-te/centos/dockerfile-build b/tools/gluten-te/centos/dockerfile-build new file mode 100644 index 000000000000..c8686768a829 --- /dev/null +++ b/tools/gluten-te/centos/dockerfile-build @@ -0,0 +1,101 @@ +ARG DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE + +FROM $DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE AS gluten-build +MAINTAINER Hongze Zhang + +# Whether debug build is enabled +ARG JDK_DEBUG_BUILD +ARG GLUTEN_DEBUG_BUILD +RUN echo "JDK debug build is [$JDK_DEBUG_BUILD]!" +RUN echo "Gluten debug build is [$GLUTEN_DEBUG_BUILD]!" + +# If JDK debug is on +RUN if [ "$JDK_DEBUG_BUILD" == "ON" ]; \ + then \ + apt-get update; \ + DEBIAN_FRONTEND=noninteractive apt-get uninstall -y openjdk-8-jdk; \ + DEBIAN_FRONTEND=noninteractive apt-get uninstall -y maven; \ + mkdir -p /opt/jdk/ \ + && mkdir -p /opt/maven/ \ + && cd /opt/jdk/ \ + && wget https://builds.shipilev.net/openjdk-jdk8/openjdk-jdk8-linux-x86_64-server-fastdebug-gcc8-glibc2.28.tar.xz \ + && tar -xvf openjdk-jdk8-linux-x86_64-server-fastdebug-gcc8-glibc2.28.tar.xz \ + && rm -f openjdk-jdk8-linux-x86_64-server-fastdebug-gcc8-glibc2.28.tar.xz \ + && cd /opt/maven/ \ + && wget https://dlcdn.apache.org/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz \ + && tar -xvf apache-maven-3.6.3-bin.tar.gz \ + && rm -f apache-maven-3.6.3-bin.tar.gz \ + && cp -rs /opt/jdk/j2sdk-image/bin/* /usr/local/bin/ \ + && cp -rs /opt/maven/apache-maven-3.6.3/bin/mvn /usr/local/bin/ \ + && echo "JAVA_HOME=/opt/jdk/j2sdk-image" > ~/.mavenrc; \ + fi + +# These branches are mainly for pre-downloading dependencies to speed-up builds. +# Thus it should not be required to change these values every time when the build branch +# is changed. +ARG CACHE_GLUTEN_REPO +ARG CACHE_GLUTEN_BRANCH + +RUN test -n "$CACHE_GLUTEN_REPO" || (echo "CACHE_GLUTEN_REPO not set" && false) +RUN test -n "$CACHE_GLUTEN_BRANCH" || (echo "CACHE_GLUTEN_BRANCH not set" && false) + +RUN cd /opt/ \ + && git clone $CACHE_GLUTEN_REPO -b $CACHE_GLUTEN_BRANCH gluten + +# Set ccache size +RUN ccache -M 128G +RUN ccache -s + +# Default Gluten Maven build options (empty as of now) +ENV GLUTEN_MAVEN_OPTIONS= +#RUN set-login-env "GLUTEN_MAVEN_OPTIONS=" + +ARG BUILD_BACKEND_TYPE + +RUN test -n "$BUILD_BACKEND_TYPE" || (echo "BUILD_BACKEND_TYPE not set" && false) + +RUN if [ "$BUILD_BACKEND_TYPE" == "velox" ]; \ + then \ + if [ "$GLUTEN_DEBUG_BUILD" == "ON" ]; then GLUTEN_BUILD_TYPE="Debug"; else GLUTEN_BUILD_TYPE="Release"; fi; \ + DEPS_INSTALL_SCRIPT="source /env.sh && bash /opt/gluten/dev/builddeps-veloxbe.sh \ + --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON \ + --build_type=$GLUTEN_BUILD_TYPE --enable_ep_cache=ON"; \ + EXTRA_MAVEN_OPTIONS="-Pspark-3.2 \ + -Pbackends-velox \ + -Prss \ + -DskipTests \ + -Dscalastyle.skip=true \ + -Dcheckstyle.skip=true"; \ + else \ + echo "Unrecognizable backend type: $BUILD_BACKEND_TYPE"; \ + exit 1; \ + fi \ + && echo $EXTRA_MAVEN_OPTIONS > ~/.gluten-mvn-options \ + && echo $DEPS_INSTALL_SCRIPT > ~/.gluten-deps-install-script + +# Prebuild Gluten +RUN EXTRA_MAVEN_OPTIONS=$(cat ~/.gluten-mvn-options) \ + DEPS_INSTALL_SCRIPT=$(cat ~/.gluten-deps-install-script) \ + && cd /opt/gluten \ + && bash -c "$DEPS_INSTALL_SCRIPT" \ + && bash -c "mvn clean install $GLUTEN_MAVEN_OPTIONS $EXTRA_MAVEN_OPTIONS" + +# Build Gluten +ARG TARGET_GLUTEN_REPO +ARG TARGET_GLUTEN_COMMIT + +RUN test -n "$TARGET_GLUTEN_REPO" || (echo "TARGET_GLUTEN_REPO not set" && false) +RUN test -n "$TARGET_GLUTEN_COMMIT" || (echo "TARGET_GLUTEN_COMMIT not set" && false) + +RUN cd /opt/gluten \ + && git fetch $TARGET_GLUTEN_REPO $TARGET_GLUTEN_COMMIT:build_$TARGET_GLUTEN_COMMIT \ + && git checkout build_$TARGET_GLUTEN_COMMIT + +RUN EXTRA_MAVEN_OPTIONS=$(cat ~/.gluten-mvn-options) \ + DEPS_INSTALL_SCRIPT=$(cat ~/.gluten-deps-install-script) \ + && cd /opt/gluten \ + && bash -c "$DEPS_INSTALL_SCRIPT" \ + && bash -c "mvn clean install $GLUTEN_MAVEN_OPTIONS $EXTRA_MAVEN_OPTIONS" \ + && bash -c "mv ep/build-velox/build/velox_ep /opt/velox" + +# EOF diff --git a/tools/gluten-te/ubuntu/dockerfile-build b/tools/gluten-te/ubuntu/dockerfile-build index cc0738342670..26d6a35f8b18 100644 --- a/tools/gluten-te/ubuntu/dockerfile-build +++ b/tools/gluten-te/ubuntu/dockerfile-build @@ -57,7 +57,9 @@ RUN test -n "$BUILD_BACKEND_TYPE" || (echo "BUILD_BACKEND_TYPE not set" && false RUN if [ "$BUILD_BACKEND_TYPE" == "velox" ]; \ then \ if [ "$GLUTEN_DEBUG_BUILD" == "ON" ]; then GLUTEN_BUILD_TYPE="Debug"; else GLUTEN_BUILD_TYPE="Release"; fi; \ - DEPS_INSTALL_SCRIPT="bash /opt/gluten/dev/builddeps-veloxbe.sh --build_type=$GLUTEN_BUILD_TYPE --enable_ep_cache=ON"; \ + DEPS_INSTALL_SCRIPT="bash /opt/gluten/dev/builddeps-veloxbe.sh \ + --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON \ + --build_type=$GLUTEN_BUILD_TYPE --enable_ep_cache=ON"; \ EXTRA_MAVEN_OPTIONS="-Pspark-3.2 \ -Pbackends-velox \ -Prss \ @@ -93,6 +95,7 @@ RUN EXTRA_MAVEN_OPTIONS=$(cat ~/.gluten-mvn-options) \ DEPS_INSTALL_SCRIPT=$(cat ~/.gluten-deps-install-script) \ && cd /opt/gluten \ && bash -c "$DEPS_INSTALL_SCRIPT" \ - && bash -c "mvn clean install $GLUTEN_MAVEN_OPTIONS $EXTRA_MAVEN_OPTIONS" + && bash -c "mvn clean install $GLUTEN_MAVEN_OPTIONS $EXTRA_MAVEN_OPTIONS" \ + && bash -c "mv ep/build-velox/build/velox_ep /opt/velox" # EOF diff --git a/tools/gluten-te/ubuntu/dockerfile-buildenv b/tools/gluten-te/ubuntu/dockerfile-buildenv index 28af876d2b80..fb88d170a8e5 100644 --- a/tools/gluten-te/ubuntu/dockerfile-buildenv +++ b/tools/gluten-te/ubuntu/dockerfile-buildenv @@ -110,6 +110,10 @@ RUN cd /opt/spark322 && ./build/mvn -Pyarn -DskipTests clean install RUN cd /opt && git clone --depth 1 --branch v3.3.1 https://github.com/apache/spark.git spark331 RUN cd /opt/spark331 && ./build/mvn -Pyarn -DskipTests clean install +# Build & install Spark 3.4.1 +RUN cd /opt && git clone --depth 1 --branch v3.4.1 https://github.com/apache/spark.git spark341 +RUN cd /opt/spark341 && ./build/mvn -Pyarn -DskipTests clean install + # Prepare entry command COPY scripts/cmd.sh /root/.cmd.sh CMD ["/root/.cmd.sh"]