From b22ec236cee0bb0d22f7e921af48b79a2630c33d Mon Sep 17 00:00:00 2001 From: Ping Yu Date: Tue, 8 Nov 2022 19:13:03 +0800 Subject: [PATCH] [to #295] Speed up cdc integrated tests (#294) * speed up integrated tests Signed-off-by: pingyu * wip Signed-off-by: pingyu * update release version; add usage comment for up.sh Signed-off-by: pingyu * print more debug info Signed-off-by: pingyu * fix fmt Signed-off-by: pingyu * add shellcheck Signed-off-by: pingyu * fix CI error Signed-off-by: pingyu * remove "br" in comment Signed-off-by: pingyu * remove useless cdc target Signed-off-by: pingyu Signed-off-by: pingyu --- cdc/Makefile | 12 +- .../docker/integration-test.Dockerfile | 26 ++-- .../integration_tests/_utils/check_sync_diff | 4 +- .../integration_tests/_utils/run_cdc_server | 2 +- cdc/tests/integration_tests/_utils/run_curl | 38 ++++++ .../_utils/start_tidb_cluster_impl | 62 ++++++--- .../integration_tests/cdc_hang_on/run.sh | 35 +++-- cdc/tests/tests.Dockerfile | 47 +++++++ cdc/tests/up.sh | 123 ++++++++++++++++++ 9 files changed, 300 insertions(+), 49 deletions(-) create mode 100755 cdc/tests/integration_tests/_utils/run_curl create mode 100644 cdc/tests/tests.Dockerfile create mode 100755 cdc/tests/up.sh diff --git a/cdc/Makefile b/cdc/Makefile index 72fc5ddb..884a6789 100644 --- a/cdc/Makefile +++ b/cdc/Makefile @@ -43,7 +43,7 @@ FAILPOINT_DISABLE := $$(echo $(FAILPOINT_DIR) | xargs $(FAILPOINT) disable >/dev RELEASE_VERSION = ifeq ($(RELEASE_VERSION),) - RELEASE_VERSION := v1.0.0-master + RELEASE_VERSION := v1.1.0-master release_version_regex := ^cdc-v[0-9]\..*$$ release_branch_regex := "^cdc-[0-9]\.[0-9].*$$|^HEAD$$|^.*/*tags/cdc-v[0-9]\.[0-9]\..*$$" ifneq ($(shell git rev-parse --abbrev-ref HEAD | egrep $(release_branch_regex)),) @@ -69,6 +69,8 @@ LDFLAGS += -X "$(CDC_PKG)/pkg/version.GitHash=$(GITHASH)" LDFLAGS += -X "$(CDC_PKG)/pkg/version.GitBranch=$(GITBRANCH)" LDFLAGS += -X "$(CDC_PKG)/pkg/version.GoVersion=$(GOVERSION)" +SCVERSION := stable + default: build buildsucc buildsucc: @@ -276,3 +278,11 @@ check_third_party_binary: @which scripts/bin/go-ycsb @which scripts/bin/etcdctl +shellcheck: tools/bin/shellcheck + tools/bin/shellcheck ./**/*.sh + +tools/bin/shellcheck: + wget -qO- "https://github.com/koalaman/shellcheck/releases/download/$(SCVERSION)/shellcheck-$(SCVERSION).$(ARCH).x86_64.tar.xz" | tar -xJv + mv "shellcheck-$(SCVERSION)/shellcheck" tools/bin/ + chmod +x tools/bin/shellcheck + rm -rf "shellcheck-$(SCVERSION)" diff --git a/cdc/deployments/tikv-cdc/docker/integration-test.Dockerfile b/cdc/deployments/tikv-cdc/docker/integration-test.Dockerfile index 03371cfa..2d216336 100644 --- a/cdc/deployments/tikv-cdc/docker/integration-test.Dockerfile +++ b/cdc/deployments/tikv-cdc/docker/integration-test.Dockerfile @@ -11,9 +11,9 @@ RUN ./download-integration-test-binaries.sh master RUN ls ./bin # Download go into /usr/local dir. -ENV GOLANG_VERSION 1.16.4 +ENV GOLANG_VERSION 1.18.5 ENV GOLANG_DOWNLOAD_URL https://dl.google.com/go/go$GOLANG_VERSION.linux-amd64.tar.gz -ENV GOLANG_DOWNLOAD_SHA256 7154e88f5a8047aad4b80ebace58a059e36e7e2e4eb3b383127a28c711b4ff59 +ENV GOLANG_DOWNLOAD_SHA256 9e5de37f9c49942c601b191ac5fba404b868bfc21d446d6960acc12283d6e5f2 RUN curl -fsSL "$GOLANG_DOWNLOAD_URL" -o golang.tar.gz \ && echo "$GOLANG_DOWNLOAD_SHA256 golang.tar.gz" | sha256sum -c - \ && tar -C /usr/local -xzf golang.tar.gz \ @@ -29,19 +29,17 @@ RUN yum install -y \ git \ bash-completion \ wget \ - which \ + which \ gcc \ make \ - curl \ - tar \ - musl-dev \ - psmisc + curl \ + tar \ + musl-dev \ + psmisc \ + mysql RUN wget http://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm RUN yum install -y epel-release-latest-7.noarch.rpm RUN yum --enablerepo=epel install -y s3cmd -RUN wget -i -c http://dev.mysql.com/get/mysql57-community-release-el7-10.noarch.rpm -RUN yum install -y mysql57-community-release-el7-10.noarch.rpm -RUN yum install -y mysql-server # Copy go form downloader. COPY --from=downloader /usr/local/go /usr/local/go @@ -49,12 +47,12 @@ ENV GOPATH /go ENV GOROOT /usr/local/go ENV PATH $GOPATH/bin:$GOROOT/bin:$PATH -WORKDIR /go/src/github.com/pingcap/tiflow +WORKDIR /go/src/github.com/tikv/migration/cdc COPY . . -# Clean bin dir and build TiCDC. +# Clean bin dir and build TiKV-CDC. # We always need to clean before we build, please don't adjust its order. RUN make clean -RUN make integration_test_build kafka_consumer cdc -COPY --from=downloader /root/download/bin/* ./bin/ +RUN make integration_test_build +COPY --from=downloader /root/download/bin/* ./scripts/bin/ RUN make check_third_party_binary diff --git a/cdc/tests/integration_tests/_utils/check_sync_diff b/cdc/tests/integration_tests/_utils/check_sync_diff index cd1dd3d4..c0e3063c 100755 --- a/cdc/tests/integration_tests/_utils/check_sync_diff +++ b/cdc/tests/integration_tests/_utils/check_sync_diff @@ -24,6 +24,8 @@ if ! command -v rawkv_data &>/dev/null; then fi set +e +sleep 1 # sleep to avoid unnecessary checksum, as many existed codes check sync just after rawkv_op + cd $workdir i=0 while [ $i -lt $check_time ]; do @@ -36,7 +38,7 @@ while [ $i -lt $check_time ]; do fi ((i++)) echo "check diff failed $i-th time, retry later" - sleep 5 + sleep 1 done if [ $i -ge $check_time ]; then diff --git a/cdc/tests/integration_tests/_utils/run_cdc_server b/cdc/tests/integration_tests/_utils/run_cdc_server index 61773121..1938eab8 100755 --- a/cdc/tests/integration_tests/_utils/run_cdc_server +++ b/cdc/tests/integration_tests/_utils/run_cdc_server @@ -161,7 +161,7 @@ for ((i = 0; i <= 50; i++)); do echo 'Failed to start TiKV-CDC' exit 1 fi - sleep 3 + sleep 1 done set +x diff --git a/cdc/tests/integration_tests/_utils/run_curl b/cdc/tests/integration_tests/_utils/run_curl new file mode 100755 index 00000000..c0810cf9 --- /dev/null +++ b/cdc/tests/integration_tests/_utils/run_curl @@ -0,0 +1,38 @@ +#!/bin/bash +# +# Copyright 2020 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Clone from https://github.com/pingcap/tidb/blob/master/br/tests/_utils/run_curl + +# Usage: run_curl https://url '{"json":"data"}' + +set -eu + +CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +TLS_DIR=$(cd $CUR/../_certificates && pwd) + +if [ -z "${2-}" ]; then + POST_ARGS= +else + POST_ARGS="--post-data $2" +fi + +# FIXME: use `wget` instead of `curl` because the latter rejects ECC certs on our CI. +# CentOS is re~~ally old. +wget -q -O - \ + --ca-certificate="$TLS_DIR/ca.pem" \ + --certificate="$TLS_DIR/server.pem" \ + --private-key="$TLS_DIR/server-key.pem" \ + $POST_ARGS "$1" diff --git a/cdc/tests/integration_tests/_utils/start_tidb_cluster_impl b/cdc/tests/integration_tests/_utils/start_tidb_cluster_impl index 15d242ff..24e90cee 100755 --- a/cdc/tests/integration_tests/_utils/start_tidb_cluster_impl +++ b/cdc/tests/integration_tests/_utils/start_tidb_cluster_impl @@ -11,6 +11,7 @@ tidb_config= pd_config= multiple_upstream_pd= random_file_name= +verify_tidb=false # Random generate the sockets config. # Make sure we dont use the same sock. @@ -43,6 +44,9 @@ while [[ ${1} ]]; do multiple_upstream_pd=${2} shift ;; + --verify-tidb) + vierfy_tidb=true + ;; *) echo "Unknown parameter: ${1}" >&2 exit 1 @@ -209,7 +213,20 @@ tikv-server \ -C "$OUT_DIR/tikv-config.toml" \ -s "$OUT_DIR/tikv_down" & -sleep 2 +ensure_tikv() { + PD_ADDR=$1 + echo "Waiting initializing TiKV..." + while ! run_curl "http://$PD_ADDR/pd/api/v1/cluster/status" | grep '"is_initialized": true'; do + i=$((i + 1)) + if [ "$i" -gt 20 ]; then + echo 'Failed to initialize TiKV cluster' + return 1 + fi + sleep 1 + done +} +ensure_tikv "${UP_PD_HOST_1}:${UP_PD_PORT_1}" +ensure_tikv "${DOWN_PD_HOST}:${DOWN_PD_PORT}" echo "Starting Upstream TiDB..." randomGenSocketsConf @@ -231,27 +248,30 @@ tidb-server \ --status=${DOWN_TIDB_STATUS} \ --log-file "$OUT_DIR/tidb_down.log" & -echo "Verifying Upstream TiDB is started..." -i=0 -while ! mysql -uroot -h${UP_TIDB_HOST} -P${UP_TIDB_PORT} --default-character-set utf8mb4 -e 'select * from mysql.tidb;'; do - i=$((i + 1)) - if [ "$i" -gt 60 ]; then - echo 'Failed to start upstream TiDB' - exit 2 - fi - sleep 2 -done +# Skip verify tidb to speed up +if [ "$verify_tidb" = true ]; then + echo "Verifying Upstream TiDB is started..." + i=0 + while ! mysql -uroot -h${UP_TIDB_HOST} -P${UP_TIDB_PORT} --default-character-set utf8mb4 -e 'select * from mysql.tidb;'; do + i=$((i + 1)) + if [ "$i" -gt 60 ]; then + echo 'Failed to start upstream TiDB' + exit 2 + fi + sleep 2 + done -echo "Verifying Downstream TiDB is started..." -i=0 -while ! mysql -uroot -h${DOWN_TIDB_HOST} -P${DOWN_TIDB_PORT} --default-character-set utf8mb4 -e 'select * from mysql.tidb;'; do - i=$((i + 1)) - if [ "$i" -gt 60 ]; then - echo 'Failed to start downstream TiDB' - exit 1 - fi - sleep 2 -done + echo "Verifying Downstream TiDB is started..." + i=0 + while ! mysql -uroot -h${DOWN_TIDB_HOST} -P${DOWN_TIDB_PORT} --default-character-set utf8mb4 -e 'select * from mysql.tidb;'; do + i=$((i + 1)) + if [ "$i" -gt 60 ]; then + echo 'Failed to start downstream TiDB' + exit 1 + fi + sleep 2 + done +fi echo "Starting CDC state checker..." if ! command -v cdc_state_checker &>/dev/null; then diff --git a/cdc/tests/integration_tests/cdc_hang_on/run.sh b/cdc/tests/integration_tests/cdc_hang_on/run.sh index a1fb8c6a..9b1e7473 100644 --- a/cdc/tests/integration_tests/cdc_hang_on/run.sh +++ b/cdc/tests/integration_tests/cdc_hang_on/run.sh @@ -12,7 +12,7 @@ DOWN_PD=http://$DOWN_PD_HOST:$DOWN_PD_PORT RETRY_TIME=10 function restart_cdc() { id=$1 - count=$(ps -aux | grep "tikv-cdc.test" | grep "cdc$id.log" | wc | awk '{print $1}') + local count=$(ps -aux | grep "tikv-cdc.test" | grep "cdc$id.log" | wc | awk '{print $1}') if [ "$count" -eq 0 ]; then echo "restart cdc$id" run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "$id" --addr "127.0.0.1:860$id" --pd "$UP_PD" @@ -22,22 +22,26 @@ function restart_cdc() { function check_capture_count() { expected=$1 - for ((i = 0; i <= 10; i++)); do - count=$(tikv-cdc cli capture list --pd=$UP_PD | jq '.|length') + local max_retry=60 + local i + for ((i = 0; i <= $max_retry; i++)); do + local captures=$(tikv-cdc cli capture list --pd=$UP_PD) + local count=$(echo $captures | jq '.|length') if [[ "$count" == "$expected" ]]; then echo "check capture count successfully" break fi - if [ "$i" -eq 10 ]; then - echo "failed to check capture count, expected: $expected, got: $count" + echo "failed to check capture count, expected: $expected, got: $count, retry: $i" + echo "captures: $captures" + if [ "$i" -eq "$max_retry" ]; then + echo "failed to check capture count, max retires exceed" exit 1 fi - echo "failed to check capture count, expected: $expected, got: $count, retry: $i" # when sent SIGSTOP to pd leader, cdc maybe exit that is expect, and we # shoule restart it restart_cdc 1 restart_cdc 2 - sleep 10 + sleep 1 done } @@ -51,15 +55,24 @@ function run() { run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "1" --addr "127.0.0.1:8600" --pd "$UP_PD" run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "2" --addr "127.0.0.1:8601" --pd "$UP_PD" - for i in {1..10}; do + local i=1 + while [ $i -le 10 ]; do echo "cdc_hang_on test $i" - name=$(pd-ctl member --pd=$UP_PD | jq ."leader" | jq ."name" | tr -d '"') - pid=$(ps -aux | grep "name=$name" | awk '{print $2}' | head -n1) + member="$(pd-ctl member --pd=$UP_PD)" + name=$(echo $member | jq ."leader" | jq ."name" | tr -d '"') + if ! [[ "$name" =~ ^pd[0-9]+ ]]; then + echo "pd leader not found: $member" + sleep 1 + continue + fi + echo "pd leader: $name" + pid=$(pgrep -f "name=$name" | head -n1) kill -19 $pid sleep 10 check_capture_count 2 kill -18 $pid - sleep 10 + sleep 1 + ((i++)) done cleanup_process $CDC_BINARY diff --git a/cdc/tests/tests.Dockerfile b/cdc/tests/tests.Dockerfile new file mode 100644 index 00000000..d43f5103 --- /dev/null +++ b/cdc/tests/tests.Dockerfile @@ -0,0 +1,47 @@ +# Copy from deployments/tikv-cdc/docker/integration-test.Dockerfile, +# but use local files other than COPY +# amd64 only +FROM amd64/centos:centos7 as downloader + +USER root +WORKDIR /root/download + +# Download go into /usr/local dir. +ENV GOLANG_VERSION 1.18.5 +ENV GOLANG_DOWNLOAD_URL https://dl.google.com/go/go${GOLANG_VERSION}.linux-amd64.tar.gz +ENV GOLANG_DOWNLOAD_SHA256 9e5de37f9c49942c601b191ac5fba404b868bfc21d446d6960acc12283d6e5f2 +RUN curl -fsSL "$GOLANG_DOWNLOAD_URL" -o golang.tar.gz \ + && echo "$GOLANG_DOWNLOAD_SHA256 golang.tar.gz" | sha256sum -c - \ + && tar -C /usr/local -xzf golang.tar.gz \ + && rm golang.tar.gz + +FROM amd64/centos:centos7 + +USER root +WORKDIR /root + +# Installing dependencies. +RUN yum install -y \ + git \ + bash-completion \ + wget \ + which \ + gcc \ + make \ + curl \ + tar \ + psmisc \ + mysql + +RUN wget http://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm +RUN yum install -y epel-release-latest-7.noarch.rpm +RUN yum --enablerepo=epel install -y s3cmd jq + +COPY --from=downloader /usr/local/go /usr/local/go +ENV GOPATH /go +ENV GOROOT /usr/local/go +ENV PATH $GOPATH/bin:$GOROOT/bin:$PATH + +WORKDIR /cdc + +ENTRYPOINT ["/bin/bash"] diff --git a/cdc/tests/up.sh b/cdc/tests/up.sh new file mode 100755 index 00000000..6f5c5d8c --- /dev/null +++ b/cdc/tests/up.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env bash +# fork from https://github.com/pingcap/tidb/blob/master/br/tests/up.sh +# +# Use for creating a container to run integrated tests in a isolated environment. +# +# Usage: +# 1. Run up.sh from cdc directory: tests/up.sh +# 2. Run: make integration_test +# + +set -eo pipefail + +IMAGE_TAG="nightly" +while [[ $# -gt 0 ]]; do + key="$1" + + case $key in + --tag) + IMAGE_TAG=$2 + shift + shift + ;; + --cleanup-docker) + CLEANUP_DOCKER=1 + shift + ;; + --cleanup-data) + CLEANUP_DATA=1 + shift + ;; + --cleanup-all) + CLEANUP_ALL=1 + shift + ;; + --help) + HELP=1 + shift + ;; + *) + HELP=1 + break + ;; + esac +done + +if [ "$HELP" ]; then + echo "Usage: $0 [OPTIONS]" + echo "OPTIONS:" + echo " --help Display this message" + echo " --tag (TAG) Specify images tag used in tests" + echo " --cleanup-docker Clean up tests Docker containers" + echo " --cleanup-data Clean up persistent data" + echo " --cleanup-all Clean up all data inlcuding Docker images, containers and persistent data" + exit 0 +fi + +host_tmp=/tmp/tikv_cdc_test_$USER +host_bash_history=$host_tmp/bash_history + +# Persist tests data and bash history +mkdir -p "$host_tmp" +touch "$host_bash_history" || true +function cleanup_data() { + rm -rf "$host_tmp" || { + echo try "sudo rm -rf $host_tmp"? + exit 1 + } +} +if [ "$CLEANUP_DATA" ]; then + cleanup_data + exit 0 +fi + +# Clean up docker images and containers. +docker_repo=tikv_cdc_tests +function cleanup_docker_containers() { + containers=$(docker container ps --all --filter="ancestor=$docker_repo:$IMAGE_TAG" -q) + if [ "$containers" ]; then + docker stop "$containers" + docker rm "$containers" + fi +} +function cleanup_docker_images() { + images=$(docker images --filter="reference=$docker_repo:$IMAGE_TAG" -q) + if [ "$images" ]; then + docker rmi "$images" + fi +} +if [ "$CLEANUP_DOCKER" ]; then + cleanup_docker_containers + exit 0 +fi + +if [ "$CLEANUP_ALL" ]; then + cleanup_data + cleanup_docker_containers + cleanup_docker_images + exit 0 +fi + +docker build -t "$docker_repo":"$IMAGE_TAG" -f tests/tests.Dockerfile . + +# Start an existing container or create and run a new container. +exist_container=$(docker container ps --all -q --filter="ancestor=$docker_repo:$IMAGE_TAG" --filter="status=exited" | head -n 1) +if [ "$exist_container" ]; then + docker start "$exist_container" + echo "Attach exsiting container: $exist_container" + exec docker attach "$exist_container" +else + volume_args=() + for f in * .[^.]*; do + volume_args=("${volume_args[@]} -v $(pwd)/$f:/cdc/$f") + done + echo "Run a new container" + echo "Run \"make integration_test\" to start integrated tests" + # shellcheck disable=SC2068 + exec docker run -it \ + -v "$host_tmp":/tmp/tikv_cdc_test \ + -v "$host_bash_history":/root/.bash_history \ + ${volume_args[@]} \ + --cpus=8 --memory=16g \ + "$docker_repo":"$IMAGE_TAG" +fi